Merge "Removing the ops-guide from openstack-manuals"
@ -8,7 +8,7 @@ declare -A BOOKS=(
|
|||||||
["de"]="install-guide"
|
["de"]="install-guide"
|
||||||
["fr"]="install-guide"
|
["fr"]="install-guide"
|
||||||
["id"]="image-guide install-guide"
|
["id"]="image-guide install-guide"
|
||||||
["ja"]="ha-guide image-guide install-guide ops-guide"
|
["ja"]="ha-guide image-guide install-guide"
|
||||||
["ko_KR"]="install-guide"
|
["ko_KR"]="install-guide"
|
||||||
["ru"]="install-guide"
|
["ru"]="install-guide"
|
||||||
["tr_TR"]="image-guide install-guide arch-design"
|
["tr_TR"]="image-guide install-guide arch-design"
|
||||||
@ -47,7 +47,6 @@ declare -A SPECIAL_BOOKS=(
|
|||||||
["image-guide"]="RST"
|
["image-guide"]="RST"
|
||||||
["install-guide"]="RST"
|
["install-guide"]="RST"
|
||||||
["networking-guide"]="RST"
|
["networking-guide"]="RST"
|
||||||
["ops-guide"]="RST"
|
|
||||||
# Do not translate for now, we need to fix our scripts first to
|
# Do not translate for now, we need to fix our scripts first to
|
||||||
# generate the content properly.
|
# generate the content properly.
|
||||||
["install-guide-debconf"]="skip"
|
["install-guide-debconf"]="skip"
|
||||||
|
@ -50,8 +50,6 @@ The following books explain how to configure and run an OpenStack cloud:
|
|||||||
|
|
||||||
* `Configuration Reference <https://docs.openstack.org/ocata/config-reference/>`_
|
* `Configuration Reference <https://docs.openstack.org/ocata/config-reference/>`_
|
||||||
|
|
||||||
* `Operations Guide <https://docs.openstack.org/ops-guide/>`_
|
|
||||||
|
|
||||||
* `Networking Guide <https://docs.openstack.org/neutron/latest/admin/>`_
|
* `Networking Guide <https://docs.openstack.org/neutron/latest/admin/>`_
|
||||||
|
|
||||||
* `High Availability Guide <https://docs.openstack.org/ha-guide/>`_
|
* `High Availability Guide <https://docs.openstack.org/ha-guide/>`_
|
||||||
|
@ -1,27 +0,0 @@
|
|||||||
[metadata]
|
|
||||||
name = openstackopsguide
|
|
||||||
summary = OpenStack Operations Guide
|
|
||||||
author = OpenStack
|
|
||||||
author-email = openstack-docs@lists.openstack.org
|
|
||||||
home-page = https://docs.openstack.org/
|
|
||||||
classifier =
|
|
||||||
Environment :: OpenStack
|
|
||||||
Intended Audience :: Information Technology
|
|
||||||
Intended Audience :: System Administrators
|
|
||||||
License :: OSI Approved :: Apache Software License
|
|
||||||
Operating System :: POSIX :: Linux
|
|
||||||
Topic :: Documentation
|
|
||||||
|
|
||||||
[global]
|
|
||||||
setup-hooks =
|
|
||||||
pbr.hooks.setup_hook
|
|
||||||
|
|
||||||
[files]
|
|
||||||
|
|
||||||
[build_sphinx]
|
|
||||||
warning-is-error = 1
|
|
||||||
build-dir = build
|
|
||||||
source-dir = source
|
|
||||||
|
|
||||||
[wheel]
|
|
||||||
universal = 1
|
|
@ -1,30 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# Copyright (c) 2013 Hewlett-Packard Development Company, L.P.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
# implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# THIS FILE IS MANAGED BY THE GLOBAL REQUIREMENTS REPO - DO NOT EDIT
|
|
||||||
import setuptools
|
|
||||||
|
|
||||||
# In python < 2.7.4, a lazy loading of package `pbr` will break
|
|
||||||
# setuptools if some other modules registered functions in `atexit`.
|
|
||||||
# solution from: http://bugs.python.org/issue15881#msg170215
|
|
||||||
try:
|
|
||||||
import multiprocessing # noqa
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
setuptools.setup(
|
|
||||||
setup_requires=['pbr'],
|
|
||||||
pbr=True)
|
|
@ -1,51 +0,0 @@
|
|||||||
================
|
|
||||||
Acknowledgements
|
|
||||||
================
|
|
||||||
|
|
||||||
The OpenStack Foundation supported the creation of this book with plane
|
|
||||||
tickets to Austin, lodging (including one adventurous evening without
|
|
||||||
power after a windstorm), and delicious food. For about USD $10,000, we
|
|
||||||
could collaborate intensively for a week in the same room at the
|
|
||||||
Rackspace Austin office. The authors are all members of the OpenStack
|
|
||||||
Foundation, which you can join. Go to the `Foundation web
|
|
||||||
site <https://www.openstack.org/join>`_.
|
|
||||||
|
|
||||||
We want to acknowledge our excellent host Rackers at Rackspace in
|
|
||||||
Austin:
|
|
||||||
|
|
||||||
- Emma Richards of Rackspace Guest Relations took excellent care of our
|
|
||||||
lunch orders and even set aside a pile of sticky notes that had
|
|
||||||
fallen off the walls.
|
|
||||||
|
|
||||||
- Betsy Hagemeier, a Fanatical Executive Assistant, took care of a room
|
|
||||||
reshuffle and helped us settle in for the week.
|
|
||||||
|
|
||||||
- The Real Estate team at Rackspace in Austin, also known as "The
|
|
||||||
Victors," were super responsive.
|
|
||||||
|
|
||||||
- Adam Powell in Racker IT supplied us with bandwidth each day and
|
|
||||||
second monitors for those of us needing more screens.
|
|
||||||
|
|
||||||
- On Wednesday night we had a fun happy hour with the Austin OpenStack
|
|
||||||
Meetup group and Racker Katie Schmidt took great care of our group.
|
|
||||||
|
|
||||||
We also had some excellent input from outside of the room:
|
|
||||||
|
|
||||||
- Tim Bell from CERN gave us feedback on the outline before we started
|
|
||||||
and reviewed it mid-week.
|
|
||||||
|
|
||||||
- Sébastien Han has written excellent blogs and generously gave his
|
|
||||||
permission for re-use.
|
|
||||||
|
|
||||||
- Oisin Feeley read it, made some edits, and provided emailed feedback
|
|
||||||
right when we asked.
|
|
||||||
|
|
||||||
Inside the book sprint room with us each day was our book sprint
|
|
||||||
facilitator Adam Hyde. Without his tireless support and encouragement,
|
|
||||||
we would have thought a book of this scope was impossible in five days.
|
|
||||||
Adam has proven the book sprint method effectively again and again. He
|
|
||||||
creates both tools and faith in collaborative authoring at
|
|
||||||
`www.booksprints.net <http://www.booksprints.net/>`_.
|
|
||||||
|
|
||||||
We couldn't have pulled it off without so much supportive help and
|
|
||||||
encouragement.
|
|
@ -1,536 +0,0 @@
|
|||||||
=================================
|
|
||||||
Tales From the Cryp^H^H^H^H Cloud
|
|
||||||
=================================
|
|
||||||
|
|
||||||
Herein lies a selection of tales from OpenStack cloud operators. Read,
|
|
||||||
and learn from their wisdom.
|
|
||||||
|
|
||||||
Double VLAN
|
|
||||||
~~~~~~~~~~~
|
|
||||||
|
|
||||||
I was on-site in Kelowna, British Columbia, Canada setting up a new
|
|
||||||
OpenStack cloud. The deployment was fully automated: Cobbler deployed
|
|
||||||
the OS on the bare metal, bootstrapped it, and Puppet took over from
|
|
||||||
there. I had run the deployment scenario so many times in practice and
|
|
||||||
took for granted that everything was working.
|
|
||||||
|
|
||||||
On my last day in Kelowna, I was in a conference call from my hotel. In
|
|
||||||
the background, I was fooling around on the new cloud. I launched an
|
|
||||||
instance and logged in. Everything looked fine. Out of boredom, I ran
|
|
||||||
:command:`ps aux` and all of the sudden the instance locked up.
|
|
||||||
|
|
||||||
Thinking it was just a one-off issue, I terminated the instance and
|
|
||||||
launched a new one. By then, the conference call ended and I was off to
|
|
||||||
the data center.
|
|
||||||
|
|
||||||
At the data center, I was finishing up some tasks and remembered the
|
|
||||||
lock-up. I logged into the new instance and ran :command:`ps aux` again.
|
|
||||||
It worked. Phew. I decided to run it one more time. It locked up.
|
|
||||||
|
|
||||||
After reproducing the problem several times, I came to the unfortunate
|
|
||||||
conclusion that this cloud did indeed have a problem. Even worse, my
|
|
||||||
time was up in Kelowna and I had to return back to Calgary.
|
|
||||||
|
|
||||||
Where do you even begin troubleshooting something like this? An instance
|
|
||||||
that just randomly locks up when a command is issued. Is it the image?
|
|
||||||
Nope—it happens on all images. Is it the compute node? Nope—all nodes.
|
|
||||||
Is the instance locked up? No! New SSH connections work just fine!
|
|
||||||
|
|
||||||
We reached out for help. A networking engineer suggested it was an MTU
|
|
||||||
issue. Great! MTU! Something to go on! What's MTU and why would it cause
|
|
||||||
a problem?
|
|
||||||
|
|
||||||
MTU is maximum transmission unit. It specifies the maximum number of
|
|
||||||
bytes that the interface accepts for each packet. If two interfaces have
|
|
||||||
two different MTUs, bytes might get chopped off and weird things
|
|
||||||
happen—such as random session lockups.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Not all packets have a size of 1500. Running the :command:`ls` command over
|
|
||||||
SSH might only create a single packets less than 1500 bytes.
|
|
||||||
However, running a command with heavy output, such as :command:`ps aux`
|
|
||||||
requires several packets of 1500 bytes.
|
|
||||||
|
|
||||||
OK, so where is the MTU issue coming from? Why haven't we seen this in
|
|
||||||
any other deployment? What's new in this situation? Well, new data
|
|
||||||
center, new uplink, new switches, new model of switches, new servers,
|
|
||||||
first time using this model of servers… so, basically everything was
|
|
||||||
new. Wonderful. We toyed around with raising the MTU at various areas:
|
|
||||||
the switches, the NICs on the compute nodes, the virtual NICs in the
|
|
||||||
instances, we even had the data center raise the MTU for our uplink
|
|
||||||
interface. Some changes worked, some didn't. This line of
|
|
||||||
troubleshooting didn't feel right, though. We shouldn't have to be
|
|
||||||
changing the MTU in these areas.
|
|
||||||
|
|
||||||
As a last resort, our network admin (Alvaro) and myself sat down with
|
|
||||||
four terminal windows, a pencil, and a piece of paper. In one window, we
|
|
||||||
ran ping. In the second window, we ran ``tcpdump`` on the cloud
|
|
||||||
controller. In the third, ``tcpdump`` on the compute node. And the forth
|
|
||||||
had ``tcpdump`` on the instance. For background, this cloud was a
|
|
||||||
multi-node, non-multi-host setup.
|
|
||||||
|
|
||||||
One cloud controller acted as a gateway to all compute nodes.
|
|
||||||
VlanManager was used for the network config. This means that the cloud
|
|
||||||
controller and all compute nodes had a different VLAN for each OpenStack
|
|
||||||
project. We used the ``-s`` option of ``ping`` to change the packet
|
|
||||||
size. We watched as sometimes packets would fully return, sometimes they'd
|
|
||||||
only make it out and never back in, and sometimes the packets would stop at a
|
|
||||||
random point. We changed ``tcpdump`` to start displaying the hex dump of
|
|
||||||
the packet. We pinged between every combination of outside, controller,
|
|
||||||
compute, and instance.
|
|
||||||
|
|
||||||
Finally, Alvaro noticed something. When a packet from the outside hits
|
|
||||||
the cloud controller, it should not be configured with a VLAN. We
|
|
||||||
verified this as true. When the packet went from the cloud controller to
|
|
||||||
the compute node, it should only have a VLAN if it was destined for an
|
|
||||||
instance. This was still true. When the ping reply was sent from the
|
|
||||||
instance, it should be in a VLAN. True. When it came back to the cloud
|
|
||||||
controller and on its way out to the Internet, it should no longer have
|
|
||||||
a VLAN. False. Uh oh. It looked as though the VLAN part of the packet
|
|
||||||
was not being removed.
|
|
||||||
|
|
||||||
That made no sense.
|
|
||||||
|
|
||||||
While bouncing this idea around in our heads, I was randomly typing
|
|
||||||
commands on the compute node:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ ip a
|
|
||||||
…
|
|
||||||
10: vlan100@vlan20: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master br100 state UP
|
|
||||||
…
|
|
||||||
|
|
||||||
"Hey Alvaro, can you run a VLAN on top of a VLAN?"
|
|
||||||
|
|
||||||
"If you did, you'd add an extra 4 bytes to the packet…"
|
|
||||||
|
|
||||||
Then it all made sense…
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ grep vlan_interface /etc/nova/nova.conf
|
|
||||||
vlan_interface=vlan20
|
|
||||||
|
|
||||||
In ``nova.conf``, ``vlan_interface`` specifies what interface OpenStack
|
|
||||||
should attach all VLANs to. The correct setting should have been:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
vlan_interface=bond0
|
|
||||||
|
|
||||||
As this would be the server's bonded NIC.
|
|
||||||
|
|
||||||
vlan20 is the VLAN that the data center gave us for outgoing Internet
|
|
||||||
access. It's a correct VLAN and is also attached to bond0.
|
|
||||||
|
|
||||||
By mistake, I configured OpenStack to attach all tenant VLANs to vlan20
|
|
||||||
instead of bond0 thereby stacking one VLAN on top of another. This added
|
|
||||||
an extra 4 bytes to each packet and caused a packet of 1504 bytes to be
|
|
||||||
sent out which would cause problems when it arrived at an interface that
|
|
||||||
only accepted 1500.
|
|
||||||
|
|
||||||
As soon as this setting was fixed, everything worked.
|
|
||||||
|
|
||||||
"The Issue"
|
|
||||||
~~~~~~~~~~~
|
|
||||||
|
|
||||||
At the end of August 2012, a post-secondary school in Alberta, Canada
|
|
||||||
migrated its infrastructure to an OpenStack cloud. As luck would have
|
|
||||||
it, within the first day or two of it running, one of their servers just
|
|
||||||
disappeared from the network. Blip. Gone.
|
|
||||||
|
|
||||||
After restarting the instance, everything was back up and running. We
|
|
||||||
reviewed the logs and saw that at some point, network communication
|
|
||||||
stopped and then everything went idle. We chalked this up to a random
|
|
||||||
occurrence.
|
|
||||||
|
|
||||||
A few nights later, it happened again.
|
|
||||||
|
|
||||||
We reviewed both sets of logs. The one thing that stood out the most was
|
|
||||||
DHCP. At the time, OpenStack, by default, set DHCP leases for one minute
|
|
||||||
(it's now two minutes). This means that every instance contacts the
|
|
||||||
cloud controller (DHCP server) to renew its fixed IP. For some reason,
|
|
||||||
this instance could not renew its IP. We correlated the instance's logs
|
|
||||||
with the logs on the cloud controller and put together a conversation:
|
|
||||||
|
|
||||||
#. Instance tries to renew IP.
|
|
||||||
|
|
||||||
#. Cloud controller receives the renewal request and sends a response.
|
|
||||||
|
|
||||||
#. Instance "ignores" the response and re-sends the renewal request.
|
|
||||||
|
|
||||||
#. Cloud controller receives the second request and sends a new
|
|
||||||
response.
|
|
||||||
|
|
||||||
#. Instance begins sending a renewal request to ``255.255.255.255``
|
|
||||||
since it hasn't heard back from the cloud controller.
|
|
||||||
|
|
||||||
#. The cloud controller receives the ``255.255.255.255`` request and
|
|
||||||
sends a third response.
|
|
||||||
|
|
||||||
#. The instance finally gives up.
|
|
||||||
|
|
||||||
With this information in hand, we were sure that the problem had to do
|
|
||||||
with DHCP. We thought that for some reason, the instance wasn't getting
|
|
||||||
a new IP address and with no IP, it shut itself off from the network.
|
|
||||||
|
|
||||||
A quick Google search turned up this: `DHCP lease errors in VLAN
|
|
||||||
mode <https://lists.launchpad.net/openstack/msg11696.html>`_
|
|
||||||
which further supported our DHCP theory.
|
|
||||||
|
|
||||||
An initial idea was to just increase the lease time. If the instance
|
|
||||||
only renewed once every week, the chances of this problem happening
|
|
||||||
would be tremendously smaller than every minute. This didn't solve the
|
|
||||||
problem, though. It was just covering the problem up.
|
|
||||||
|
|
||||||
We decided to have ``tcpdump`` run on this instance and see if we could
|
|
||||||
catch it in action again. Sure enough, we did.
|
|
||||||
|
|
||||||
The ``tcpdump`` looked very, very weird. In short, it looked as though
|
|
||||||
network communication stopped before the instance tried to renew its IP.
|
|
||||||
Since there is so much DHCP chatter from a one minute lease, it's very
|
|
||||||
hard to confirm it, but even with only milliseconds difference between
|
|
||||||
packets, if one packet arrives first, it arrived first, and if that
|
|
||||||
packet reported network issues, then it had to have happened before
|
|
||||||
DHCP.
|
|
||||||
|
|
||||||
Additionally, this instance in question was responsible for a very, very
|
|
||||||
large backup job each night. While "The Issue" (as we were now calling
|
|
||||||
it) didn't happen exactly when the backup happened, it was close enough
|
|
||||||
(a few hours) that we couldn't ignore it.
|
|
||||||
|
|
||||||
Further days go by and we catch The Issue in action more and more. We
|
|
||||||
find that dhclient is not running after The Issue happens. Now we're
|
|
||||||
back to thinking it's a DHCP issue. Running ``/etc/init.d/networking``
|
|
||||||
restart brings everything back up and running.
|
|
||||||
|
|
||||||
Ever have one of those days where all of the sudden you get the Google
|
|
||||||
results you were looking for? Well, that's what happened here. I was
|
|
||||||
looking for information on dhclient and why it dies when it can't renew
|
|
||||||
its lease and all of the sudden I found a bunch of OpenStack and dnsmasq
|
|
||||||
discussions that were identical to the problem we were seeing!
|
|
||||||
|
|
||||||
`Problem with Heavy Network IO and
|
|
||||||
Dnsmasq <http://www.gossamer-threads.com/lists/openstack/operators/18197>`_.
|
|
||||||
|
|
||||||
`instances losing IP address while running, due to No
|
|
||||||
DHCPOFFER <http://www.gossamer-threads.com/lists/openstack/dev/14696>`_.
|
|
||||||
|
|
||||||
Seriously, Google.
|
|
||||||
|
|
||||||
This bug report was the key to everything: `KVM images lose connectivity
|
|
||||||
with bridged
|
|
||||||
network <https://bugs.launchpad.net/ubuntu/+source/qemu-kvm/+bug/997978>`_.
|
|
||||||
|
|
||||||
It was funny to read the report. It was full of people who had some
|
|
||||||
strange network problem but didn't quite explain it in the same way.
|
|
||||||
|
|
||||||
So it was a qemu/kvm bug.
|
|
||||||
|
|
||||||
At the same time of finding the bug report, a co-worker was able to
|
|
||||||
successfully reproduce The Issue! How? He used ``iperf`` to spew a ton
|
|
||||||
of bandwidth at an instance. Within 30 minutes, the instance just
|
|
||||||
disappeared from the network.
|
|
||||||
|
|
||||||
Armed with a patched qemu and a way to reproduce, we set out to see if
|
|
||||||
we've finally solved The Issue. After 48 hours straight of hammering the
|
|
||||||
instance with bandwidth, we were confident. The rest is history. You can
|
|
||||||
search the bug report for "joe" to find my comments and actual tests.
|
|
||||||
|
|
||||||
Disappearing Images
|
|
||||||
~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
At the end of 2012, Cybera (a nonprofit with a mandate to oversee the
|
|
||||||
development of cyberinfrastructure in Alberta, Canada) deployed an
|
|
||||||
updated OpenStack cloud for their `DAIR
|
|
||||||
project <http://www.canarie.ca/cloud/>`_. A few days into
|
|
||||||
production, a compute node locks up. Upon rebooting the node, I checked
|
|
||||||
to see what instances were hosted on that node so I could boot them on
|
|
||||||
behalf of the customer. Luckily, only one instance.
|
|
||||||
|
|
||||||
The :command:`nova reboot` command wasn't working, so I used :command:`virsh`,
|
|
||||||
but it immediately came back with an error saying it was unable to find the
|
|
||||||
backing disk. In this case, the backing disk is the Glance image that is
|
|
||||||
copied to ``/var/lib/nova/instances/_base`` when the image is used for
|
|
||||||
the first time. Why couldn't it find it? I checked the directory and
|
|
||||||
sure enough it was gone.
|
|
||||||
|
|
||||||
I reviewed the ``nova`` database and saw the instance's entry in the
|
|
||||||
``nova.instances`` table. The image that the instance was using matched
|
|
||||||
what virsh was reporting, so no inconsistency there.
|
|
||||||
|
|
||||||
I checked Glance and noticed that this image was a snapshot that the
|
|
||||||
user created. At least that was good news—this user would have been the
|
|
||||||
only user affected.
|
|
||||||
|
|
||||||
Finally, I checked StackTach and reviewed the user's events. They had
|
|
||||||
created and deleted several snapshots—most likely experimenting.
|
|
||||||
Although the timestamps didn't match up, my conclusion was that they
|
|
||||||
launched their instance and then deleted the snapshot and it was somehow
|
|
||||||
removed from ``/var/lib/nova/instances/_base``. None of that made sense,
|
|
||||||
but it was the best I could come up with.
|
|
||||||
|
|
||||||
It turns out the reason that this compute node locked up was a hardware
|
|
||||||
issue. We removed it from the DAIR cloud and called Dell to have it
|
|
||||||
serviced. Dell arrived and began working. Somehow or another (or a fat
|
|
||||||
finger), a different compute node was bumped and rebooted. Great.
|
|
||||||
|
|
||||||
When this node fully booted, I ran through the same scenario of seeing
|
|
||||||
what instances were running so I could turn them back on. There were a
|
|
||||||
total of four. Three booted and one gave an error. It was the same error
|
|
||||||
as before: unable to find the backing disk. Seriously, what?
|
|
||||||
|
|
||||||
Again, it turns out that the image was a snapshot. The three other
|
|
||||||
instances that successfully started were standard cloud images. Was it a
|
|
||||||
problem with snapshots? That didn't make sense.
|
|
||||||
|
|
||||||
A note about DAIR's architecture: ``/var/lib/nova/instances`` is a
|
|
||||||
shared NFS mount. This means that all compute nodes have access to it,
|
|
||||||
which includes the ``_base`` directory. Another centralized area is
|
|
||||||
``/var/log/rsyslog`` on the cloud controller. This directory collects
|
|
||||||
all OpenStack logs from all compute nodes. I wondered if there were any
|
|
||||||
entries for the file that :command:`virsh` is reporting:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
dair-ua-c03/nova.log:Dec 19 12:10:59 dair-ua-c03
|
|
||||||
2012-12-19 12:10:59 INFO nova.virt.libvirt.imagecache
|
|
||||||
[-] Removing base file:
|
|
||||||
/var/lib/nova/instances/_base/7b4783508212f5d242cbf9ff56fb8d33b4ce6166_10
|
|
||||||
|
|
||||||
Ah-hah! So OpenStack was deleting it. But why?
|
|
||||||
|
|
||||||
A feature was introduced in Essex to periodically check and see if there
|
|
||||||
were any ``_base`` files not in use. If there were, OpenStack Compute
|
|
||||||
would delete them. This idea sounds innocent enough and has some good
|
|
||||||
qualities to it. But how did this feature end up turned on? It was
|
|
||||||
disabled by default in Essex. As it should be. It was `decided to be
|
|
||||||
turned on in Folsom <https://bugs.launchpad.net/nova/+bug/1029674>`_.
|
|
||||||
I cannot emphasize enough that:
|
|
||||||
|
|
||||||
*Actions which delete things should not be enabled by default.*
|
|
||||||
|
|
||||||
Disk space is cheap these days. Data recovery is not.
|
|
||||||
|
|
||||||
Secondly, DAIR's shared ``/var/lib/nova/instances`` directory
|
|
||||||
contributed to the problem. Since all compute nodes have access to this
|
|
||||||
directory, all compute nodes periodically review the \_base directory.
|
|
||||||
If there is only one instance using an image, and the node that the
|
|
||||||
instance is on is down for a few minutes, it won't be able to mark the
|
|
||||||
image as still in use. Therefore, the image seems like it's not in use
|
|
||||||
and is deleted. When the compute node comes back online, the instance
|
|
||||||
hosted on that node is unable to start.
|
|
||||||
|
|
||||||
The Valentine's Day Compute Node Massacre
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Although the title of this story is much more dramatic than the actual
|
|
||||||
event, I don't think, or hope, that I'll have the opportunity to use
|
|
||||||
"Valentine's Day Massacre" again in a title.
|
|
||||||
|
|
||||||
This past Valentine's Day, I received an alert that a compute node was
|
|
||||||
no longer available in the cloud—meaning,
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack compute service list
|
|
||||||
|
|
||||||
showed this particular node in a down state.
|
|
||||||
|
|
||||||
I logged into the cloud controller and was able to both ``ping`` and SSH
|
|
||||||
into the problematic compute node which seemed very odd. Usually if I
|
|
||||||
receive this type of alert, the compute node has totally locked up and
|
|
||||||
would be inaccessible.
|
|
||||||
|
|
||||||
After a few minutes of troubleshooting, I saw the following details:
|
|
||||||
|
|
||||||
- A user recently tried launching a CentOS instance on that node
|
|
||||||
|
|
||||||
- This user was the only user on the node (new node)
|
|
||||||
|
|
||||||
- The load shot up to 8 right before I received the alert
|
|
||||||
|
|
||||||
- The bonded 10gb network device (bond0) was in a DOWN state
|
|
||||||
|
|
||||||
- The 1gb NIC was still alive and active
|
|
||||||
|
|
||||||
I looked at the status of both NICs in the bonded pair and saw that
|
|
||||||
neither was able to communicate with the switch port. Seeing as how each
|
|
||||||
NIC in the bond is connected to a separate switch, I thought that the
|
|
||||||
chance of a switch port dying on each switch at the same time was quite
|
|
||||||
improbable. I concluded that the 10gb dual port NIC had died and needed
|
|
||||||
replaced. I created a ticket for the hardware support department at the
|
|
||||||
data center where the node was hosted. I felt lucky that this was a new
|
|
||||||
node and no one else was hosted on it yet.
|
|
||||||
|
|
||||||
An hour later I received the same alert, but for another compute node.
|
|
||||||
Crap. OK, now there's definitely a problem going on. Just like the
|
|
||||||
original node, I was able to log in by SSH. The bond0 NIC was DOWN but
|
|
||||||
the 1gb NIC was active.
|
|
||||||
|
|
||||||
And the best part: the same user had just tried creating a CentOS
|
|
||||||
instance. What?
|
|
||||||
|
|
||||||
I was totally confused at this point, so I texted our network admin to
|
|
||||||
see if he was available to help. He logged in to both switches and
|
|
||||||
immediately saw the problem: the switches detected spanning tree packets
|
|
||||||
coming from the two compute nodes and immediately shut the ports down to
|
|
||||||
prevent spanning tree loops:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
Feb 15 01:40:18 SW-1 Stp: %SPANTREE-4-BLOCK_BPDUGUARD: Received BPDU packet on Port-Channel35 with BPDU guard enabled. Disabling interface. (source mac fa:16:3e:24:e7:22)
|
|
||||||
Feb 15 01:40:18 SW-1 Ebra: %ETH-4-ERRDISABLE: bpduguard error detected on Port-Channel35.
|
|
||||||
Feb 15 01:40:18 SW-1 Mlag: %MLAG-4-INTF_INACTIVE_LOCAL: Local interface Port-Channel35 is link down. MLAG 35 is inactive.
|
|
||||||
Feb 15 01:40:18 SW-1 Ebra: %LINEPROTO-5-UPDOWN: Line protocol on Interface Port-Channel35 (Server35), changed state to down
|
|
||||||
Feb 15 01:40:19 SW-1 Stp: %SPANTREE-6-INTERFACE_DEL: Interface Port-Channel35 has been removed from instance MST0
|
|
||||||
Feb 15 01:40:19 SW-1 Ebra: %LINEPROTO-5-UPDOWN: Line protocol on Interface Ethernet35 (Server35), changed state to down
|
|
||||||
|
|
||||||
He re-enabled the switch ports and the two compute nodes immediately
|
|
||||||
came back to life.
|
|
||||||
|
|
||||||
Unfortunately, this story has an open ending... we're still looking into
|
|
||||||
why the CentOS image was sending out spanning tree packets. Further,
|
|
||||||
we're researching a proper way on how to mitigate this from happening.
|
|
||||||
It's a bigger issue than one might think. While it's extremely important
|
|
||||||
for switches to prevent spanning tree loops, it's very problematic to
|
|
||||||
have an entire compute node be cut from the network when this happens.
|
|
||||||
If a compute node is hosting 100 instances and one of them sends a
|
|
||||||
spanning tree packet, that instance has effectively DDOS'd the other 99
|
|
||||||
instances.
|
|
||||||
|
|
||||||
This is an ongoing and hot topic in networking circles —especially with
|
|
||||||
the raise of virtualization and virtual switches.
|
|
||||||
|
|
||||||
Down the Rabbit Hole
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Users being able to retrieve console logs from running instances is a
|
|
||||||
boon for support—many times they can figure out what's going on inside
|
|
||||||
their instance and fix what's going on without bothering you.
|
|
||||||
Unfortunately, sometimes overzealous logging of failures can cause
|
|
||||||
problems of its own.
|
|
||||||
|
|
||||||
A report came in: VMs were launching slowly, or not at all. Cue the
|
|
||||||
standard checks—nothing on the Nagios, but there was a spike in network
|
|
||||||
towards the current master of our RabbitMQ cluster. Investigation
|
|
||||||
started, but soon the other parts of the queue cluster were leaking
|
|
||||||
memory like a sieve. Then the alert came in—the master Rabbit server
|
|
||||||
went down and connections failed over to the slave.
|
|
||||||
|
|
||||||
At that time, our control services were hosted by another team and we
|
|
||||||
didn't have much debugging information to determine what was going on
|
|
||||||
with the master, and we could not reboot it. That team noted that it
|
|
||||||
failed without alert, but managed to reboot it. After an hour, the
|
|
||||||
cluster had returned to its normal state and we went home for the day.
|
|
||||||
|
|
||||||
Continuing the diagnosis the next morning was kick started by another
|
|
||||||
identical failure. We quickly got the message queue running again, and
|
|
||||||
tried to work out why Rabbit was suffering from so much network traffic.
|
|
||||||
Enabling debug logging on nova-api quickly brought understanding. A
|
|
||||||
``tail -f /var/log/nova/nova-api.log`` was scrolling by faster
|
|
||||||
than we'd ever seen before. CTRL+C on that and we could plainly see the
|
|
||||||
contents of a system log spewing failures over and over again - a system
|
|
||||||
log from one of our users' instances.
|
|
||||||
|
|
||||||
After finding the instance ID we headed over to
|
|
||||||
``/var/lib/nova/instances`` to find the ``console.log``:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
adm@cc12:/var/lib/nova/instances/instance-00000e05# wc -l console.log
|
|
||||||
92890453 console.log
|
|
||||||
adm@cc12:/var/lib/nova/instances/instance-00000e05# ls -sh console.log
|
|
||||||
5.5G console.log
|
|
||||||
|
|
||||||
Sure enough, the user had been periodically refreshing the console log
|
|
||||||
page on the dashboard and the 5G file was traversing the Rabbit cluster
|
|
||||||
to get to the dashboard.
|
|
||||||
|
|
||||||
We called them and asked them to stop for a while, and they were happy
|
|
||||||
to abandon the horribly broken VM. After that, we started monitoring the
|
|
||||||
size of console logs.
|
|
||||||
|
|
||||||
To this day, `the issue <https://bugs.launchpad.net/nova/+bug/832507>`__
|
|
||||||
doesn't have a permanent resolution, but we look forward to the discussion
|
|
||||||
at the next summit.
|
|
||||||
|
|
||||||
Havana Haunted by the Dead
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Felix Lee of Academia Sinica Grid Computing Centre in Taiwan contributed
|
|
||||||
this story.
|
|
||||||
|
|
||||||
I just upgraded OpenStack from Grizzly to Havana 2013.2-2 using the RDO
|
|
||||||
repository and everything was running pretty well—except the EC2 API.
|
|
||||||
|
|
||||||
I noticed that the API would suffer from a heavy load and respond slowly
|
|
||||||
to particular EC2 requests such as ``RunInstances``.
|
|
||||||
|
|
||||||
Output from ``/var/log/nova/nova-api.log`` on :term:`Havana`:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
2014-01-10 09:11:45.072 129745 INFO nova.ec2.wsgi.server
|
|
||||||
[req-84d16d16-3808-426b-b7af-3b90a11b83b0
|
|
||||||
0c6e7dba03c24c6a9bce299747499e8a 7052bd6714e7460caeb16242e68124f9]
|
|
||||||
117.103.103.29 "GET
|
|
||||||
/services/Cloud?AWSAccessKeyId=[something]&Action=RunInstances&ClientToken=[something]&ImageId=ami-00000001&InstanceInitiatedShutdownBehavior=terminate...
|
|
||||||
HTTP/1.1" status: 200 len: 1109 time: 138.5970151
|
|
||||||
|
|
||||||
This request took over two minutes to process, but executed quickly on
|
|
||||||
another co-existing Grizzly deployment using the same hardware and
|
|
||||||
system configuration.
|
|
||||||
|
|
||||||
Output from ``/var/log/nova/nova-api.log`` on :term:`Grizzly`:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
2014-01-08 11:15:15.704 INFO nova.ec2.wsgi.server
|
|
||||||
[req-ccac9790-3357-4aa8-84bd-cdaab1aa394e
|
|
||||||
ebbd729575cb404081a45c9ada0849b7 8175953c209044358ab5e0ec19d52c37]
|
|
||||||
117.103.103.29 "GET
|
|
||||||
/services/Cloud?AWSAccessKeyId=[something]&Action=RunInstances&ClientToken=[something]&ImageId=ami-00000007&InstanceInitiatedShutdownBehavior=terminate...
|
|
||||||
HTTP/1.1" status: 200 len: 931 time: 3.9426181
|
|
||||||
|
|
||||||
While monitoring system resources, I noticed a significant increase in
|
|
||||||
memory consumption while the EC2 API processed this request. I thought
|
|
||||||
it wasn't handling memory properly—possibly not releasing memory. If the
|
|
||||||
API received several of these requests, memory consumption quickly grew
|
|
||||||
until the system ran out of RAM and began using swap. Each node has 48
|
|
||||||
GB of RAM and the "nova-api" process would consume all of it within
|
|
||||||
minutes. Once this happened, the entire system would become unusably
|
|
||||||
slow until I restarted the nova-api service.
|
|
||||||
|
|
||||||
So, I found myself wondering what changed in the EC2 API on Havana that
|
|
||||||
might cause this to happen. Was it a bug or a normal behavior that I now
|
|
||||||
need to work around?
|
|
||||||
|
|
||||||
After digging into the nova (OpenStack Compute) code, I noticed two
|
|
||||||
areas in ``api/ec2/cloud.py`` potentially impacting my system:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
instances = self.compute_api.get_all(context,
|
|
||||||
search_opts=search_opts,
|
|
||||||
sort_dir='asc')
|
|
||||||
|
|
||||||
sys_metas = self.compute_api.get_all_system_metadata(
|
|
||||||
context, search_filts=[{'key': ['EC2_client_token']},
|
|
||||||
{'value': [client_token]}])
|
|
||||||
|
|
||||||
Since my database contained many records—over 1 million metadata records
|
|
||||||
and over 300,000 instance records in "deleted" or "errored" states—each
|
|
||||||
search took a long time. I decided to clean up the database by first
|
|
||||||
archiving a copy for backup and then performing some deletions using the
|
|
||||||
MySQL client. For example, I ran the following SQL command to remove
|
|
||||||
rows of instances deleted for over a year:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
mysql> delete from nova.instances where deleted=1 and terminated_at < (NOW() - INTERVAL 1 YEAR);
|
|
||||||
|
|
||||||
Performance increased greatly after deleting the old records and my new
|
|
||||||
deployment continues to behave well.
|
|
@ -1,62 +0,0 @@
|
|||||||
=========
|
|
||||||
Resources
|
|
||||||
=========
|
|
||||||
|
|
||||||
OpenStack
|
|
||||||
~~~~~~~~~
|
|
||||||
|
|
||||||
- `OpenStack Installation Tutorial for openSUSE and SUSE Linux Enterprise
|
|
||||||
Server <https://docs.openstack.org/ocata/install-guide-obs/>`_
|
|
||||||
|
|
||||||
- `OpenStack Installation Tutorial for Red Hat Enterprise Linux and CentOS
|
|
||||||
<https://docs.openstack.org/ocata/install-guide-rdo/>`_
|
|
||||||
|
|
||||||
- `OpenStack Installation Tutorial for Ubuntu
|
|
||||||
Server <https://docs.openstack.org/ocata/install-guide-ubuntu/>`_
|
|
||||||
|
|
||||||
- `OpenStack Administrator Guide <https://docs.openstack.org/admin-guide/>`_
|
|
||||||
|
|
||||||
- `OpenStack Cloud Computing Cookbook (Packt
|
|
||||||
Publishing) <http://www.packtpub.com/openstack-cloud-computing-cookbook-second-edition/book>`_
|
|
||||||
|
|
||||||
Cloud (General)
|
|
||||||
~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
- `The NIST Definition of Cloud
|
|
||||||
Computing <http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-145.pdf>`_
|
|
||||||
|
|
||||||
Python
|
|
||||||
~~~~~~
|
|
||||||
|
|
||||||
- `Dive Into Python (Apress) <http://www.diveintopython.net/>`_
|
|
||||||
|
|
||||||
Networking
|
|
||||||
~~~~~~~~~~
|
|
||||||
|
|
||||||
- `TCP/IP Illustrated, Volume 1: The Protocols, 2/E
|
|
||||||
(Pearson) <http://www.pearsonhighered.com/educator/product/TCPIP-Illustrated-Volume-1-The-Protocols/9780321336316.page>`_
|
|
||||||
|
|
||||||
- `The TCP/IP Guide (No Starch
|
|
||||||
Press) <http://www.nostarch.com/tcpip.htm>`_
|
|
||||||
|
|
||||||
- `A tcpdump Tutorial and
|
|
||||||
Primer <http://danielmiessler.com/study/tcpdump/>`_
|
|
||||||
|
|
||||||
Systems Administration
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
- `UNIX and Linux Systems Administration Handbook (Prentice
|
|
||||||
Hall) <http://www.admin.com/>`_
|
|
||||||
|
|
||||||
Virtualization
|
|
||||||
~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
- `The Book of Xen (No Starch
|
|
||||||
Press) <http://www.nostarch.com/xen.htm>`_
|
|
||||||
|
|
||||||
Configuration Management
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
- `Puppet Labs Documentation <http://docs.puppetlabs.com/>`_
|
|
||||||
|
|
||||||
- `Pro Puppet (Apress) <http://www.apress.com/9781430230571>`_
|
|
@ -1,435 +0,0 @@
|
|||||||
=====================
|
|
||||||
Working with Roadmaps
|
|
||||||
=====================
|
|
||||||
|
|
||||||
The good news: OpenStack has unprecedented transparency when it comes to
|
|
||||||
providing information about what's coming up. The bad news: each release
|
|
||||||
moves very quickly. The purpose of this appendix is to highlight some of
|
|
||||||
the useful pages to track, and take an educated guess at what is coming
|
|
||||||
up in the next release and perhaps further afield.
|
|
||||||
|
|
||||||
OpenStack follows a six month release cycle, typically releasing in
|
|
||||||
April/May and October/November each year. At the start of each cycle,
|
|
||||||
the community gathers in a single location for a design summit. At the
|
|
||||||
summit, the features for the coming releases are discussed, prioritized,
|
|
||||||
and planned. The below figure shows an example release cycle, with dates
|
|
||||||
showing milestone releases, code freeze, and string freeze dates, along
|
|
||||||
with an example of when the summit occurs. Milestones are interim releases
|
|
||||||
within the cycle that are available as packages for download and
|
|
||||||
testing. Code freeze is putting a stop to adding new features to the
|
|
||||||
release. String freeze is putting a stop to changing any strings within
|
|
||||||
the source code.
|
|
||||||
|
|
||||||
.. image:: figures/osog_ac01.png
|
|
||||||
:width: 100%
|
|
||||||
|
|
||||||
|
|
||||||
Information Available to You
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
There are several good sources of information available that you can use
|
|
||||||
to track your OpenStack development desires.
|
|
||||||
|
|
||||||
Release notes are maintained on the OpenStack wiki, and also shown here:
|
|
||||||
|
|
||||||
.. list-table::
|
|
||||||
:widths: 25 25 25 25
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* - Series
|
|
||||||
- Status
|
|
||||||
- Releases
|
|
||||||
- Date
|
|
||||||
* - Liberty
|
|
||||||
- `Under Development
|
|
||||||
<https://wiki.openstack.org/wiki/Liberty_Release_Schedule>`_
|
|
||||||
- 2015.2
|
|
||||||
- Oct, 2015
|
|
||||||
* - Kilo
|
|
||||||
- `Current stable release, security-supported
|
|
||||||
<https://wiki.openstack.org/wiki/Kilo_Release_Schedule>`_
|
|
||||||
- `2015.1 <https://wiki.openstack.org/wiki/ReleaseNotes/Kilo>`_
|
|
||||||
- Apr 30, 2015
|
|
||||||
* - Juno
|
|
||||||
- `Security-supported
|
|
||||||
<https://wiki.openstack.org/wiki/Juno_Release_Schedule>`_
|
|
||||||
- `2014.2 <https://wiki.openstack.org/wiki/ReleaseNotes/Juno>`_
|
|
||||||
- Oct 16, 2014
|
|
||||||
* - Icehouse
|
|
||||||
- `End-of-life
|
|
||||||
<https://wiki.openstack.org/wiki/Icehouse_Release_Schedule>`_
|
|
||||||
- `2014.1 <https://wiki.openstack.org/wiki/ReleaseNotes/Icehouse>`_
|
|
||||||
- Apr 17, 2014
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2014.1.1 <https://wiki.openstack.org/wiki/ReleaseNotes/2014.1.1>`_
|
|
||||||
- Jun 9, 2014
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2014.1.2 <https://wiki.openstack.org/wiki/ReleaseNotes/2014.1.2>`_
|
|
||||||
- Aug 8, 2014
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2014.1.3 <https://wiki.openstack.org/wiki/ReleaseNotes/2014.1.3>`_
|
|
||||||
- Oct 2, 2014
|
|
||||||
* - Havana
|
|
||||||
- End-of-life
|
|
||||||
- `2013.2 <https://wiki.openstack.org/wiki/ReleaseNotes/Havana>`_
|
|
||||||
- Apr 4, 2013
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.2.1 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.2.1>`_
|
|
||||||
- Dec 16, 2013
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.2.2 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.2.2>`_
|
|
||||||
- Feb 13, 2014
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.2.3 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.2.3>`_
|
|
||||||
- Apr 3, 2014
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.2.4 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.2.4>`_
|
|
||||||
- Sep 22, 2014
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.2.1 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.2.1>`_
|
|
||||||
- Dec 16, 2013
|
|
||||||
* - Grizzly
|
|
||||||
- End-of-life
|
|
||||||
- `2013.1 <https://wiki.openstack.org/wiki/ReleaseNotes/Grizzly>`_
|
|
||||||
- Apr 4, 2013
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.1.1 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.1.1>`_
|
|
||||||
- May 9, 2013
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.1.2 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.1.2>`_
|
|
||||||
- Jun 6, 2013
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.1.3 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.1.3>`_
|
|
||||||
- Aug 8, 2013
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.1.4 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.1.4>`_
|
|
||||||
- Oct 17, 2013
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2013.1.5 <https://wiki.openstack.org/wiki/ReleaseNotes/2013.1.5>`_
|
|
||||||
- Mar 20, 2015
|
|
||||||
* - Folsom
|
|
||||||
- End-of-life
|
|
||||||
- `2012.2 <https://wiki.openstack.org/wiki/ReleaseNotes/Folsom>`_
|
|
||||||
- Sep 27, 2012
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2012.2.1 <https://wiki.openstack.org/wiki/ReleaseNotes/2012.2.1>`_
|
|
||||||
- Nov 29, 2012
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2012.2.2 <https://wiki.openstack.org/wiki/ReleaseNotes/2012.2.2>`_
|
|
||||||
- Dec 13, 2012
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2012.2.3 <https://wiki.openstack.org/wiki/ReleaseNotes/2012.2.3>`_
|
|
||||||
- Jan 31, 2013
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2012.2.4 <https://wiki.openstack.org/wiki/ReleaseNotes/2012.2.4>`_
|
|
||||||
- Apr 11, 2013
|
|
||||||
* - Essex
|
|
||||||
- End-of-life
|
|
||||||
- `2012.1 <https://wiki.openstack.org/wiki/ReleaseNotes/Essex>`_
|
|
||||||
- Apr 5, 2012
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2012.1.1 <https://wiki.openstack.org/wiki/ReleaseNotes/2012.1.1>`_
|
|
||||||
- Jun 22, 2012
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2012.1.2 <https://wiki.openstack.org/wiki/ReleaseNotes/2012.1.2>`_
|
|
||||||
- Aug 10, 2012
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2012.1.3 <https://wiki.openstack.org/wiki/ReleaseNotes/2012.1.3>`_
|
|
||||||
- Oct 12, 2012
|
|
||||||
* - Diablo
|
|
||||||
- Deprecated
|
|
||||||
- `2011.3 <https://wiki.openstack.org/wiki/ReleaseNotes/Diablo>`_
|
|
||||||
- Sep 22, 2011
|
|
||||||
* -
|
|
||||||
-
|
|
||||||
- `2011.3.1 <https://wiki.openstack.org/wiki/ReleaseNotes/2011.3.1>`_
|
|
||||||
- Jan 19, 2012
|
|
||||||
* - Cactus
|
|
||||||
- Deprecated
|
|
||||||
- `2011.2 <https://wiki.openstack.org/wiki/ReleaseNotes/Cactus>`_
|
|
||||||
- Apr 15, 2011
|
|
||||||
* - Bexar
|
|
||||||
- Deprecated
|
|
||||||
- `2011.1 <https://wiki.openstack.org/wiki/ReleaseNotes/Bexar>`_
|
|
||||||
- Feb 3, 2011
|
|
||||||
* - Austin
|
|
||||||
- Deprecated
|
|
||||||
- `2010.1 <https://wiki.openstack.org/wiki/ReleaseNotes/Austin>`_
|
|
||||||
- Oct 21, 2010
|
|
||||||
|
|
||||||
Here are some other resources:
|
|
||||||
|
|
||||||
- `A breakdown of current features under development, with their target
|
|
||||||
milestone <https://status.openstack.org/release/>`_
|
|
||||||
|
|
||||||
- `A list of all features, including those not yet under
|
|
||||||
development <https://blueprints.launchpad.net/openstack>`_
|
|
||||||
|
|
||||||
- `Rough-draft design discussions ("etherpads") from the last design
|
|
||||||
summit <https://wiki.openstack.org/wiki/Summit/Kilo/Etherpads>`_
|
|
||||||
|
|
||||||
- `List of individual code changes under
|
|
||||||
review <https://review.openstack.org/>`_
|
|
||||||
|
|
||||||
Influencing the Roadmap
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
OpenStack truly welcomes your ideas (and contributions) and highly
|
|
||||||
values feedback from real-world users of the software. By learning a
|
|
||||||
little about the process that drives feature development, you can
|
|
||||||
participate and perhaps get the additions you desire.
|
|
||||||
|
|
||||||
Feature requests typically start their life in Etherpad, a collaborative
|
|
||||||
editing tool, which is used to take coordinating notes at a design
|
|
||||||
summit session specific to the feature. This then leads to the creation
|
|
||||||
of a blueprint on the Launchpad site for the particular project, which
|
|
||||||
is used to describe the feature more formally. Blueprints are then
|
|
||||||
approved by project team members, and development can begin.
|
|
||||||
|
|
||||||
Therefore, the fastest way to get your feature request up for
|
|
||||||
consideration is to create an Etherpad with your ideas and propose a
|
|
||||||
session to the design summit. If the design summit has already passed,
|
|
||||||
you may also create a blueprint directly. Read this `blog post about how
|
|
||||||
to work with blueprints
|
|
||||||
<http://vmartinezdelacruz.com/how-to-work-with-blueprints-without-losing-your-mind/>`_
|
|
||||||
the perspective of Victoria Martínez, a developer intern.
|
|
||||||
|
|
||||||
The roadmap for the next release as it is developed can be seen at
|
|
||||||
`Releases <https://releases.openstack.org>`_.
|
|
||||||
|
|
||||||
To determine the potential features going in to future releases, or to
|
|
||||||
look at features implemented previously, take a look at the existing
|
|
||||||
blueprints such as `OpenStack Compute (nova)
|
|
||||||
Blueprints <https://blueprints.launchpad.net/nova>`_, `OpenStack
|
|
||||||
Identity (keystone)
|
|
||||||
Blueprints <https://blueprints.launchpad.net/keystone>`_, and release
|
|
||||||
notes.
|
|
||||||
|
|
||||||
Aside from the direct-to-blueprint pathway, there is another very
|
|
||||||
well-regarded mechanism to influence the development roadmap:
|
|
||||||
the user survey. Found at `OpenStack User Survey
|
|
||||||
<https://www.openstack.org/user-survey/>`_,
|
|
||||||
it allows you to provide details of your deployments and needs, anonymously by
|
|
||||||
default. Each cycle, the user committee analyzes the results and produces a
|
|
||||||
report, including providing specific information to the technical
|
|
||||||
committee and project team leads.
|
|
||||||
|
|
||||||
Aspects to Watch
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
You want to keep an eye on the areas improving within OpenStack. The
|
|
||||||
best way to "watch" roadmaps for each project is to look at the
|
|
||||||
blueprints that are being approved for work on milestone releases. You
|
|
||||||
can also learn from PTL webinars that follow the OpenStack summits twice
|
|
||||||
a year.
|
|
||||||
|
|
||||||
Driver Quality Improvements
|
|
||||||
---------------------------
|
|
||||||
|
|
||||||
A major quality push has occurred across drivers and plug-ins in Block
|
|
||||||
Storage, Compute, and Networking. Particularly, developers of Compute
|
|
||||||
and Networking drivers that require proprietary or hardware products are
|
|
||||||
now required to provide an automated external testing system for use
|
|
||||||
during the development process.
|
|
||||||
|
|
||||||
Easier Upgrades
|
|
||||||
---------------
|
|
||||||
|
|
||||||
One of the most requested features since OpenStack began (for components
|
|
||||||
other than Object Storage, which tends to "just work"): easier upgrades.
|
|
||||||
In all recent releases internal messaging communication is versioned,
|
|
||||||
meaning services can theoretically drop back to backward-compatible
|
|
||||||
behavior. This allows you to run later versions of some components,
|
|
||||||
while keeping older versions of others.
|
|
||||||
|
|
||||||
In addition, database migrations are now tested with the Turbo Hipster
|
|
||||||
tool. This tool tests database migration performance on copies of
|
|
||||||
real-world user databases.
|
|
||||||
|
|
||||||
These changes have facilitated the first proper OpenStack upgrade guide,
|
|
||||||
found in :doc:`ops-upgrades`, and will continue to improve in the next
|
|
||||||
release.
|
|
||||||
|
|
||||||
Deprecation of Nova Network
|
|
||||||
---------------------------
|
|
||||||
|
|
||||||
With the introduction of the full software-defined networking stack
|
|
||||||
provided by OpenStack Networking (neutron) in the Folsom release,
|
|
||||||
development effort on the initial networking code that remains part of
|
|
||||||
the Compute component has gradually lessened. While many still use
|
|
||||||
``nova-network`` in production, there has been a long-term plan to
|
|
||||||
remove the code in favor of the more flexible and full-featured
|
|
||||||
OpenStack Networking.
|
|
||||||
|
|
||||||
An attempt was made to deprecate ``nova-network`` during the Havana
|
|
||||||
release, which was aborted due to the lack of equivalent functionality
|
|
||||||
(such as the FlatDHCP multi-host high-availability mode mentioned in
|
|
||||||
this guide), lack of a migration path between versions, insufficient
|
|
||||||
testing, and simplicity when used for the more straightforward use cases
|
|
||||||
``nova-network`` traditionally supported. Though significant effort has
|
|
||||||
been made to address these concerns, ``nova-network`` was not be
|
|
||||||
deprecated in the Juno release. In addition, to a limited degree,
|
|
||||||
patches to ``nova-network`` have again begin to be accepted, such as
|
|
||||||
adding a per-network settings feature and SR-IOV support in Juno.
|
|
||||||
|
|
||||||
This leaves you with an important point of decision when designing your
|
|
||||||
cloud. OpenStack Networking is robust enough to use with a small number
|
|
||||||
of limitations (performance issues in some scenarios, only basic high
|
|
||||||
availability of layer 3 systems) and provides many more features than
|
|
||||||
``nova-network``. However, if you do not have the more complex use cases
|
|
||||||
that can benefit from fuller software-defined networking capabilities,
|
|
||||||
or are uncomfortable with the new concepts introduced, ``nova-network``
|
|
||||||
may continue to be a viable option for the next 12 months.
|
|
||||||
|
|
||||||
Similarly, if you have an existing cloud and are looking to upgrade from
|
|
||||||
``nova-network`` to OpenStack Networking, you should have the option to
|
|
||||||
delay the upgrade for this period of time. However, each release of
|
|
||||||
OpenStack brings significant new innovation, and regardless of your use
|
|
||||||
of networking methodology, it is likely best to begin planning for an
|
|
||||||
upgrade within a reasonable timeframe of each release.
|
|
||||||
|
|
||||||
As mentioned, there's currently no way to cleanly migrate from
|
|
||||||
``nova-network`` to neutron. We recommend that you keep a migration in
|
|
||||||
mind and what that process might involve for when a proper migration
|
|
||||||
path is released.
|
|
||||||
|
|
||||||
Distributed Virtual Router
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
One of the long-time complaints surrounding OpenStack Networking was the
|
|
||||||
lack of high availability for the layer 3 components. The Juno release
|
|
||||||
introduced Distributed Virtual Router (DVR), which aims to solve this
|
|
||||||
problem.
|
|
||||||
|
|
||||||
Early indications are that it does do this well for a base set of
|
|
||||||
scenarios, such as using the ML2 plug-in with Open vSwitch, one flat
|
|
||||||
external network and VXLAN tenant networks. However, it does appear that
|
|
||||||
there are problems with the use of VLANs, IPv6, Floating IPs, high
|
|
||||||
north-south traffic scenarios and large numbers of compute nodes. It is
|
|
||||||
expected these will improve significantly with the next release, but bug
|
|
||||||
reports on specific issues are highly desirable.
|
|
||||||
|
|
||||||
Replacement of Open vSwitch Plug-in with Modular Layer 2
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The Modular Layer 2 plug-in is a framework allowing OpenStack Networking
|
|
||||||
to simultaneously utilize the variety of layer-2 networking technologies
|
|
||||||
found in complex real-world data centers. It currently works with the
|
|
||||||
existing Open vSwitch, Linux Bridge, and Hyper-V L2 agents and is
|
|
||||||
intended to replace and deprecate the monolithic plug-ins associated
|
|
||||||
with those L2 agents.
|
|
||||||
|
|
||||||
New API Versions
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The third version of the Compute API was broadly discussed and worked on
|
|
||||||
during the Havana and Icehouse release cycles. Current discussions
|
|
||||||
indicate that the V2 API will remain for many releases, and the next
|
|
||||||
iteration of the API will be denoted v2.1 and have similar properties to
|
|
||||||
the existing v2.0, rather than an entirely new v3 API. This is a great
|
|
||||||
time to evaluate all API and provide comments while the next generation
|
|
||||||
APIs are being defined. A new working group was formed specifically to
|
|
||||||
`improve OpenStack APIs <https://wiki.openstack.org/wiki/API_Working_Group>`_
|
|
||||||
and create design guidelines, which you are welcome to join.
|
|
||||||
|
|
||||||
OpenStack on OpenStack (TripleO)
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This project continues to improve and you may consider using it for
|
|
||||||
greenfield deployments, though according to the latest user survey
|
|
||||||
results it remains to see widespread uptake.
|
|
||||||
|
|
||||||
Data processing service for OpenStack (sahara)
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
A much-requested answer to big data problems, a dedicated team has been
|
|
||||||
making solid progress on a Hadoop-as-a-Service project.
|
|
||||||
|
|
||||||
Bare metal Deployment (ironic)
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The bare-metal deployment has been widely lauded, and development
|
|
||||||
continues. The Juno release brought the OpenStack Bare metal drive into
|
|
||||||
the Compute project, and it was aimed to deprecate the existing
|
|
||||||
bare-metal driver in Kilo. If you are a current user of the bare metal
|
|
||||||
driver, a particular blueprint to follow is `Deprecate the bare metal
|
|
||||||
driver
|
|
||||||
<https://blueprints.launchpad.net/nova/+spec/deprecate-baremetal-driver>`_
|
|
||||||
|
|
||||||
Database as a Service (trove)
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The OpenStack community has had a database-as-a-service tool in
|
|
||||||
development for some time, and we saw the first integrated release of it
|
|
||||||
in Icehouse. From its release it was able to deploy database servers out
|
|
||||||
of the box in a highly available way, initially supporting only MySQL.
|
|
||||||
Juno introduced support for Mongo (including clustering), PostgreSQL and
|
|
||||||
Couchbase, in addition to replication functionality for MySQL. In Kilo,
|
|
||||||
more advanced clustering capability was delivered, in addition to better
|
|
||||||
integration with other OpenStack components such as Networking.
|
|
||||||
|
|
||||||
Message Service (zaqar)
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
A service to provide queues of messages and notifications was released.
|
|
||||||
|
|
||||||
DNS service (designate)
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
A long requested service, to provide the ability to manipulate DNS
|
|
||||||
entries associated with OpenStack resources has gathered a following.
|
|
||||||
The designate project was also released.
|
|
||||||
|
|
||||||
Scheduler Improvements
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Both Compute and Block Storage rely on schedulers to determine where to
|
|
||||||
place virtual machines or volumes. In Havana, the Compute scheduler
|
|
||||||
underwent significant improvement, while in Icehouse it was the
|
|
||||||
scheduler in Block Storage that received a boost. Further down the
|
|
||||||
track, an effort started this cycle that aims to create a holistic
|
|
||||||
scheduler covering both will come to fruition. Some of the work that was
|
|
||||||
done in Kilo can be found under the `Gantt
|
|
||||||
project <https://wiki.openstack.org/wiki/Gantt/kilo>`_.
|
|
||||||
|
|
||||||
Block Storage Improvements
|
|
||||||
--------------------------
|
|
||||||
|
|
||||||
Block Storage is considered a stable project, with wide uptake and a
|
|
||||||
long track record of quality drivers. The team has discussed many areas
|
|
||||||
of work at the summits, including better error reporting, automated
|
|
||||||
discovery, and thin provisioning features.
|
|
||||||
|
|
||||||
Toward a Python SDK
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
Though many successfully use the various python-\*client code as an
|
|
||||||
effective SDK for interacting with OpenStack, consistency between the
|
|
||||||
projects and documentation availability waxes and wanes. To combat this,
|
|
||||||
an `effort to improve the
|
|
||||||
experience <https://wiki.openstack.org/wiki/PythonOpenStackSDK>`_ has
|
|
||||||
started. Cross-project development efforts in OpenStack have a checkered
|
|
||||||
history, such as the `unified client
|
|
||||||
project <https://wiki.openstack.org/wiki/OpenStackClient>`_ having
|
|
||||||
several false starts. However, the early signs for the SDK project are
|
|
||||||
promising, and we expect to see results during the Juno cycle.
|
|
@ -1,192 +0,0 @@
|
|||||||
=========
|
|
||||||
Use Cases
|
|
||||||
=========
|
|
||||||
|
|
||||||
This appendix contains a small selection of use cases from the
|
|
||||||
community, with more technical detail than usual. Further examples can
|
|
||||||
be found on the `OpenStack website <https://www.openstack.org/user-stories/>`_.
|
|
||||||
|
|
||||||
NeCTAR
|
|
||||||
~~~~~~
|
|
||||||
|
|
||||||
Who uses it: researchers from the Australian publicly funded research
|
|
||||||
sector. Use is across a wide variety of disciplines, with the purpose of
|
|
||||||
instances ranging from running simple web servers to using hundreds of
|
|
||||||
cores for high-throughput computing.
|
|
||||||
|
|
||||||
Deployment
|
|
||||||
----------
|
|
||||||
|
|
||||||
Using OpenStack Compute cells, the NeCTAR Research Cloud spans eight
|
|
||||||
sites with approximately 4,000 cores per site.
|
|
||||||
|
|
||||||
Each site runs a different configuration, as a resource cells in an
|
|
||||||
OpenStack Compute cells setup. Some sites span multiple data centers,
|
|
||||||
some use off compute node storage with a shared file system, and some
|
|
||||||
use on compute node storage with a non-shared file system. Each site
|
|
||||||
deploys the Image service with an Object Storage back end. A central
|
|
||||||
Identity, dashboard, and Compute API service are used. A login to the
|
|
||||||
dashboard triggers a SAML login with Shibboleth, which creates an
|
|
||||||
account in the Identity service with an SQL back end. An Object Storage
|
|
||||||
Global Cluster is used across several sites.
|
|
||||||
|
|
||||||
Compute nodes have 24 to 48 cores, with at least 4 GB of RAM per core
|
|
||||||
and approximately 40 GB of ephemeral storage per core.
|
|
||||||
|
|
||||||
All sites are based on Ubuntu 14.04, with KVM as the hypervisor. The
|
|
||||||
OpenStack version in use is typically the current stable version, with 5
|
|
||||||
to 10 percent back-ported code from trunk and modifications.
|
|
||||||
|
|
||||||
Resources
|
|
||||||
---------
|
|
||||||
|
|
||||||
- `OpenStack.org case
|
|
||||||
study <https://www.openstack.org/user-stories/nectar/>`_
|
|
||||||
|
|
||||||
- `NeCTAR-RC GitHub <https://github.com/NeCTAR-RC/>`_
|
|
||||||
|
|
||||||
- `NeCTAR website <https://www.nectar.org.au/>`_
|
|
||||||
|
|
||||||
MIT CSAIL
|
|
||||||
~~~~~~~~~
|
|
||||||
|
|
||||||
Who uses it: researchers from the MIT Computer Science and Artificial
|
|
||||||
Intelligence Lab.
|
|
||||||
|
|
||||||
Deployment
|
|
||||||
----------
|
|
||||||
|
|
||||||
The CSAIL cloud is currently 64 physical nodes with a total of 768
|
|
||||||
physical cores and 3,456 GB of RAM. Persistent data storage is largely
|
|
||||||
outside the cloud on NFS, with cloud resources focused on compute
|
|
||||||
resources. There are more than 130 users in more than 40 projects,
|
|
||||||
typically running 2,000–2,500 vCPUs in 300 to 400 instances.
|
|
||||||
|
|
||||||
We initially deployed on Ubuntu 12.04 with the Essex release of
|
|
||||||
OpenStack using FlatDHCP multi-host networking.
|
|
||||||
|
|
||||||
The software stack is still Ubuntu 12.04 LTS, but now with OpenStack
|
|
||||||
Havana from the Ubuntu Cloud Archive. KVM is the hypervisor, deployed
|
|
||||||
using `FAI <http://fai-project.org/>`_ and Puppet for configuration
|
|
||||||
management. The FAI and Puppet combination is used lab-wide, not only
|
|
||||||
for OpenStack. There is a single cloud controller node, which also acts
|
|
||||||
as network controller, with the remainder of the server hardware
|
|
||||||
dedicated to compute nodes.
|
|
||||||
|
|
||||||
Host aggregates and instance-type extra specs are used to provide two
|
|
||||||
different resource allocation ratios. The default resource allocation
|
|
||||||
ratios we use are 4:1 CPU and 1.5:1 RAM. Compute-intensive workloads use
|
|
||||||
instance types that require non-oversubscribed hosts where ``cpu_ratio``
|
|
||||||
and ``ram_ratio`` are both set to 1.0. Since we have hyper-threading
|
|
||||||
enabled on our compute nodes, this provides one vCPU per CPU thread, or
|
|
||||||
two vCPUs per physical core.
|
|
||||||
|
|
||||||
With our upgrade to Grizzly in August 2013, we moved to OpenStack
|
|
||||||
Networking, neutron (quantum at the time). Compute nodes have
|
|
||||||
two-gigabit network interfaces and a separate management card for IPMI
|
|
||||||
management. One network interface is used for node-to-node
|
|
||||||
communications. The other is used as a trunk port for OpenStack managed
|
|
||||||
VLANs. The controller node uses two bonded 10g network interfaces for
|
|
||||||
its public IP communications. Big pipes are used here because images are
|
|
||||||
served over this port, and it is also used to connect to iSCSI storage,
|
|
||||||
back-ending the image storage and database. The controller node also has
|
|
||||||
a gigabit interface that is used in trunk mode for OpenStack managed
|
|
||||||
VLAN traffic. This port handles traffic to the dhcp-agent and
|
|
||||||
metadata-proxy.
|
|
||||||
|
|
||||||
We approximate the older ``nova-network`` multi-host HA setup by using
|
|
||||||
"provider VLAN networks" that connect instances directly to existing
|
|
||||||
publicly addressable networks and use existing physical routers as their
|
|
||||||
default gateway. This means that if our network controller goes down,
|
|
||||||
running instances still have their network available, and no single
|
|
||||||
Linux host becomes a traffic bottleneck. We are able to do this because
|
|
||||||
we have a sufficient supply of IPv4 addresses to cover all of our
|
|
||||||
instances and thus don't need NAT and don't use floating IP addresses.
|
|
||||||
We provide a single generic public network to all projects and
|
|
||||||
additional existing VLANs on a project-by-project basis as needed.
|
|
||||||
Individual projects are also allowed to create their own private GRE
|
|
||||||
based networks.
|
|
||||||
|
|
||||||
Resources
|
|
||||||
---------
|
|
||||||
|
|
||||||
- `CSAIL homepage <http://www.csail.mit.edu/>`_
|
|
||||||
|
|
||||||
DAIR
|
|
||||||
~~~~
|
|
||||||
|
|
||||||
Who uses it: DAIR is an integrated virtual environment that leverages
|
|
||||||
the CANARIE network to develop and test new information communication
|
|
||||||
technology (ICT) and other digital technologies. It combines such
|
|
||||||
digital infrastructure as advanced networking and cloud computing and
|
|
||||||
storage to create an environment for developing and testing innovative
|
|
||||||
ICT applications, protocols, and services; performing at-scale
|
|
||||||
experimentation for deployment; and facilitating a faster time to
|
|
||||||
market.
|
|
||||||
|
|
||||||
Deployment
|
|
||||||
----------
|
|
||||||
|
|
||||||
DAIR is hosted at two different data centers across Canada: one in
|
|
||||||
Alberta and the other in Quebec. It consists of a cloud controller at
|
|
||||||
each location, although, one is designated the "master" controller that
|
|
||||||
is in charge of central authentication and quotas. This is done through
|
|
||||||
custom scripts and light modifications to OpenStack. DAIR is currently
|
|
||||||
running Havana.
|
|
||||||
|
|
||||||
For Object Storage, each region has a swift environment.
|
|
||||||
|
|
||||||
A NetApp appliance is used in each region for both block storage and
|
|
||||||
instance storage. There are future plans to move the instances off the
|
|
||||||
NetApp appliance and onto a distributed file system such as :term:`Ceph` or
|
|
||||||
GlusterFS.
|
|
||||||
|
|
||||||
VlanManager is used extensively for network management. All servers have
|
|
||||||
two bonded 10GbE NICs that are connected to two redundant switches. DAIR
|
|
||||||
is set up to use single-node networking where the cloud controller is
|
|
||||||
the gateway for all instances on all compute nodes. Internal OpenStack
|
|
||||||
traffic (for example, storage traffic) does not go through the cloud
|
|
||||||
controller.
|
|
||||||
|
|
||||||
Resources
|
|
||||||
---------
|
|
||||||
|
|
||||||
- `DAIR homepage <http://www.canarie.ca/cloud/>`__
|
|
||||||
|
|
||||||
CERN
|
|
||||||
~~~~
|
|
||||||
|
|
||||||
Who uses it: researchers at CERN (European Organization for Nuclear
|
|
||||||
Research) conducting high-energy physics research.
|
|
||||||
|
|
||||||
Deployment
|
|
||||||
----------
|
|
||||||
|
|
||||||
The environment is largely based on Scientific Linux 6, which is Red Hat
|
|
||||||
compatible. We use KVM as our primary hypervisor, although tests are
|
|
||||||
ongoing with Hyper-V on Windows Server 2008.
|
|
||||||
|
|
||||||
We use the Puppet Labs OpenStack modules to configure Compute, Image
|
|
||||||
service, Identity, and dashboard. Puppet is used widely for instance
|
|
||||||
configuration, and Foreman is used as a GUI for reporting and instance
|
|
||||||
provisioning.
|
|
||||||
|
|
||||||
Users and groups are managed through Active Directory and imported into
|
|
||||||
the Identity service using LDAP. CLIs are available for nova and
|
|
||||||
Euca2ools to do this.
|
|
||||||
|
|
||||||
There are three clouds currently running at CERN, totaling about 4,700
|
|
||||||
compute nodes, with approximately 120,000 cores. The CERN IT cloud aims
|
|
||||||
to expand to 300,000 cores by 2015.
|
|
||||||
|
|
||||||
Resources
|
|
||||||
---------
|
|
||||||
|
|
||||||
- `OpenStack in Production: A tale of 3 OpenStack
|
|
||||||
Clouds <http://openstack-in-production.blogspot.de/2013/09/a-tale-of-3-openstack-clouds-50000.html>`_
|
|
||||||
|
|
||||||
- `Review of CERN Data Centre
|
|
||||||
Infrastructure <http://cds.cern.ch/record/1457989/files/chep%202012%20CERN%20infrastructure%20final.pdf?version=1>`_
|
|
||||||
|
|
||||||
- `CERN Cloud Infrastructure User
|
|
||||||
Guide <http://information-technology.web.cern.ch/book/cern-private-cloud-user-guide>`_
|
|
@ -1,12 +0,0 @@
|
|||||||
Appendix
|
|
||||||
~~~~~~~~
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
app-usecases.rst
|
|
||||||
app-crypt.rst
|
|
||||||
app-roadmaps.rst
|
|
||||||
app-resources.rst
|
|
||||||
common/app-support.rst
|
|
||||||
common/glossary.rst
|
|
@ -1 +0,0 @@
|
|||||||
../../common
|
|
@ -1,297 +0,0 @@
|
|||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
# implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# This file is execfile()d with the current directory set to its
|
|
||||||
# containing dir.
|
|
||||||
#
|
|
||||||
# Note that not all possible configuration values are present in this
|
|
||||||
# autogenerated file.
|
|
||||||
#
|
|
||||||
# All configuration values have a default; values that are commented out
|
|
||||||
# serve to show the default.
|
|
||||||
|
|
||||||
import os
|
|
||||||
# import sys
|
|
||||||
|
|
||||||
import openstackdocstheme
|
|
||||||
|
|
||||||
# If extensions (or modules to document with autodoc) are in another directory,
|
|
||||||
# add these directories to sys.path here. If the directory is relative to the
|
|
||||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
|
||||||
# sys.path.insert(0, os.path.abspath('.'))
|
|
||||||
|
|
||||||
# -- General configuration ------------------------------------------------
|
|
||||||
|
|
||||||
# If your documentation needs a minimal Sphinx version, state it here.
|
|
||||||
# needs_sphinx = '1.0'
|
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be
|
|
||||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
|
||||||
# ones.
|
|
||||||
extensions = ['openstackdocstheme']
|
|
||||||
|
|
||||||
# Add any paths that contain templates here, relative to this directory.
|
|
||||||
# templates_path = ['_templates']
|
|
||||||
|
|
||||||
# The suffix of source filenames.
|
|
||||||
source_suffix = '.rst'
|
|
||||||
|
|
||||||
# The encoding of source files.
|
|
||||||
# source_encoding = 'utf-8-sig'
|
|
||||||
|
|
||||||
# The master toctree document.
|
|
||||||
master_doc = 'index'
|
|
||||||
|
|
||||||
# General information about the project.
|
|
||||||
repository_name = "openstack/openstack-manuals"
|
|
||||||
bug_project = 'openstack-manuals'
|
|
||||||
project = u'Operations Guide'
|
|
||||||
bug_tag = u'ops-guide'
|
|
||||||
copyright = u'2016-2017, OpenStack contributors'
|
|
||||||
|
|
||||||
# The version info for the project you're documenting, acts as replacement for
|
|
||||||
# |version| and |release|, also used in various other places throughout the
|
|
||||||
# built documents.
|
|
||||||
#
|
|
||||||
# The short X.Y version.
|
|
||||||
version = '15.0'
|
|
||||||
# The full version, including alpha/beta/rc tags.
|
|
||||||
release = '15.0.0'
|
|
||||||
|
|
||||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
|
||||||
# for a list of supported languages.
|
|
||||||
# language = None
|
|
||||||
|
|
||||||
# There are two options for replacing |today|: either, you set today to some
|
|
||||||
# non-false value, then it is used:
|
|
||||||
# today = ''
|
|
||||||
# Else, today_fmt is used as the format for a strftime call.
|
|
||||||
# today_fmt = '%B %d, %Y'
|
|
||||||
|
|
||||||
# List of patterns, relative to source directory, that match files and
|
|
||||||
# directories to ignore when looking for source files.
|
|
||||||
exclude_patterns = ['common/cli*', 'common/nova*',
|
|
||||||
'common/appendix.rst',
|
|
||||||
'common/get-started*', 'common/dashboard*']
|
|
||||||
|
|
||||||
# The reST default role (used for this markup: `text`) to use for all
|
|
||||||
# documents.
|
|
||||||
# default_role = None
|
|
||||||
|
|
||||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
|
||||||
# add_function_parentheses = True
|
|
||||||
|
|
||||||
# If true, the current module name will be prepended to all description
|
|
||||||
# unit titles (such as .. function::).
|
|
||||||
# add_module_names = True
|
|
||||||
|
|
||||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
|
||||||
# output. They are ignored by default.
|
|
||||||
# show_authors = False
|
|
||||||
|
|
||||||
# The name of the Pygments (syntax highlighting) style to use.
|
|
||||||
pygments_style = 'sphinx'
|
|
||||||
|
|
||||||
# A list of ignored prefixes for module index sorting.
|
|
||||||
# modindex_common_prefix = []
|
|
||||||
|
|
||||||
# If true, keep warnings as "system message" paragraphs in the built documents.
|
|
||||||
# keep_warnings = False
|
|
||||||
|
|
||||||
|
|
||||||
# -- Options for HTML output ----------------------------------------------
|
|
||||||
|
|
||||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
|
||||||
# a list of builtin themes.
|
|
||||||
html_theme = 'openstackdocs'
|
|
||||||
|
|
||||||
# Theme options are theme-specific and customize the look and feel of a theme
|
|
||||||
# further. For a list of options available for each theme, see the
|
|
||||||
# documentation.
|
|
||||||
# html_theme_options = {}
|
|
||||||
|
|
||||||
# Add any paths that contain custom themes here, relative to this directory.
|
|
||||||
# html_theme_path = [openstackdocstheme.get_html_theme_path()]
|
|
||||||
|
|
||||||
# The name for this set of Sphinx documents. If None, it defaults to
|
|
||||||
# "<project> v<release> documentation".
|
|
||||||
# html_title = None
|
|
||||||
|
|
||||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
|
||||||
# html_short_title = None
|
|
||||||
|
|
||||||
# The name of an image file (relative to this directory) to place at the top
|
|
||||||
# of the sidebar.
|
|
||||||
# html_logo = None
|
|
||||||
|
|
||||||
# The name of an image file (within the static path) to use as favicon of the
|
|
||||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
|
||||||
# pixels large.
|
|
||||||
# html_favicon = None
|
|
||||||
|
|
||||||
# Add any paths that contain custom static files (such as style sheets) here,
|
|
||||||
# relative to this directory. They are copied after the builtin static files,
|
|
||||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
|
||||||
# html_static_path = []
|
|
||||||
|
|
||||||
# Add any extra paths that contain custom files (such as robots.txt or
|
|
||||||
# .htaccess) here, relative to this directory. These files are copied
|
|
||||||
# directly to the root of the documentation.
|
|
||||||
# html_extra_path = []
|
|
||||||
|
|
||||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
|
||||||
# using the given strftime format.
|
|
||||||
# So that we can enable "log-a-bug" links from each output HTML page, this
|
|
||||||
# variable must be set to a format that includes year, month, day, hours and
|
|
||||||
# minutes.
|
|
||||||
html_last_updated_fmt = '%Y-%m-%d %H:%M'
|
|
||||||
|
|
||||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
|
||||||
# typographically correct entities.
|
|
||||||
# html_use_smartypants = True
|
|
||||||
|
|
||||||
# Custom sidebar templates, maps document names to template names.
|
|
||||||
# html_sidebars = {}
|
|
||||||
|
|
||||||
# Additional templates that should be rendered to pages, maps page names to
|
|
||||||
# template names.
|
|
||||||
# html_additional_pages = {}
|
|
||||||
|
|
||||||
# If false, no module index is generated.
|
|
||||||
# html_domain_indices = True
|
|
||||||
|
|
||||||
# If false, no index is generated.
|
|
||||||
html_use_index = False
|
|
||||||
|
|
||||||
# If true, the index is split into individual pages for each letter.
|
|
||||||
# html_split_index = False
|
|
||||||
|
|
||||||
# If true, links to the reST sources are added to the pages.
|
|
||||||
html_show_sourcelink = False
|
|
||||||
|
|
||||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
|
||||||
# html_show_sphinx = True
|
|
||||||
|
|
||||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
|
||||||
# html_show_copyright = True
|
|
||||||
|
|
||||||
# If true, an OpenSearch description file will be output, and all pages will
|
|
||||||
# contain a <link> tag referring to it. The value of this option must be the
|
|
||||||
# base URL from which the finished HTML is served.
|
|
||||||
# html_use_opensearch = ''
|
|
||||||
|
|
||||||
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
|
||||||
# html_file_suffix = None
|
|
||||||
|
|
||||||
# Output file base name for HTML help builder.
|
|
||||||
htmlhelp_basename = 'ops-guide'
|
|
||||||
|
|
||||||
# If true, publish source files
|
|
||||||
html_copy_source = False
|
|
||||||
|
|
||||||
# -- Options for LaTeX output ---------------------------------------------
|
|
||||||
pdf_theme_path = openstackdocstheme.get_pdf_theme_path()
|
|
||||||
openstack_logo = openstackdocstheme.get_openstack_logo_path()
|
|
||||||
|
|
||||||
latex_custom_template = r"""
|
|
||||||
\newcommand{\openstacklogo}{%s}
|
|
||||||
\usepackage{%s}
|
|
||||||
""" % (openstack_logo, pdf_theme_path)
|
|
||||||
|
|
||||||
latex_engine = 'xelatex'
|
|
||||||
|
|
||||||
latex_elements = {
|
|
||||||
# The paper size ('letterpaper' or 'a4paper').
|
|
||||||
'papersize': 'a4paper',
|
|
||||||
|
|
||||||
# The font size ('10pt', '11pt' or '12pt').
|
|
||||||
'pointsize': '11pt',
|
|
||||||
|
|
||||||
#Default figure align
|
|
||||||
'figure_align': 'H',
|
|
||||||
|
|
||||||
# Not to generate blank page after chapter
|
|
||||||
'classoptions': ',openany',
|
|
||||||
|
|
||||||
# Additional stuff for the LaTeX preamble.
|
|
||||||
'preamble': latex_custom_template,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Grouping the document tree into LaTeX files. List of tuples
|
|
||||||
# (source start file, target name, title,
|
|
||||||
# author, documentclass [howto, manual, or own class]).
|
|
||||||
latex_documents = [
|
|
||||||
('index', 'OpsGuide.tex', u'Operations Guide',
|
|
||||||
u'OpenStack contributors', 'manual'),
|
|
||||||
]
|
|
||||||
|
|
||||||
# The name of an image file (relative to this directory) to place at the top of
|
|
||||||
# the title page.
|
|
||||||
# latex_logo = None
|
|
||||||
|
|
||||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
|
||||||
# not chapters.
|
|
||||||
# latex_use_parts = False
|
|
||||||
|
|
||||||
# If true, show page references after internal links.
|
|
||||||
# latex_show_pagerefs = False
|
|
||||||
|
|
||||||
# If true, show URL addresses after external links.
|
|
||||||
# latex_show_urls = False
|
|
||||||
|
|
||||||
# Documents to append as an appendix to all manuals.
|
|
||||||
# latex_appendices = []
|
|
||||||
|
|
||||||
# If false, no module index is generated.
|
|
||||||
# latex_domain_indices = True
|
|
||||||
|
|
||||||
|
|
||||||
# -- Options for manual page output ---------------------------------------
|
|
||||||
|
|
||||||
# One entry per manual page. List of tuples
|
|
||||||
# (source start file, name, description, authors, manual section).
|
|
||||||
man_pages = [
|
|
||||||
('index', 'opsguide', u'Operations Guide',
|
|
||||||
[u'OpenStack contributors'], 1)
|
|
||||||
]
|
|
||||||
|
|
||||||
# If true, show URL addresses after external links.
|
|
||||||
# man_show_urls = False
|
|
||||||
|
|
||||||
|
|
||||||
# -- Options for Texinfo output -------------------------------------------
|
|
||||||
|
|
||||||
# Grouping the document tree into Texinfo files. List of tuples
|
|
||||||
# (source start file, target name, title, author,
|
|
||||||
# dir menu entry, description, category)
|
|
||||||
texinfo_documents = [
|
|
||||||
('index', 'OpsGuide', u'Operations Guide',
|
|
||||||
u'OpenStack contributors', 'OpsGuide',
|
|
||||||
'This book provides information about designing and operating '
|
|
||||||
'OpenStack clouds.', 'Miscellaneous'),
|
|
||||||
]
|
|
||||||
|
|
||||||
# Documents to append as an appendix to all manuals.
|
|
||||||
# texinfo_appendices = []
|
|
||||||
|
|
||||||
# If false, no module index is generated.
|
|
||||||
# texinfo_domain_indices = True
|
|
||||||
|
|
||||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
|
||||||
# texinfo_show_urls = 'footnote'
|
|
||||||
|
|
||||||
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
|
||||||
# texinfo_no_detailmenu = False
|
|
||||||
|
|
||||||
# -- Options for Internationalization output ------------------------------
|
|
||||||
locale_dirs = ['locale/']
|
|
Before Width: | Height: | Size: 3.0 KiB |
@ -1,60 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
||||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
|
||||||
<svg
|
|
||||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
|
||||||
xmlns:cc="http://web.resource.org/cc/"
|
|
||||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
||||||
xmlns:svg="http://www.w3.org/2000/svg"
|
|
||||||
xmlns="http://www.w3.org/2000/svg"
|
|
||||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
|
||||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
|
||||||
width="19.21315"
|
|
||||||
height="18.294994"
|
|
||||||
id="svg2"
|
|
||||||
sodipodi:version="0.32"
|
|
||||||
inkscape:version="0.45"
|
|
||||||
sodipodi:modified="true"
|
|
||||||
version="1.0">
|
|
||||||
<defs
|
|
||||||
id="defs4" />
|
|
||||||
<sodipodi:namedview
|
|
||||||
id="base"
|
|
||||||
pagecolor="#ffffff"
|
|
||||||
bordercolor="#666666"
|
|
||||||
borderopacity="1.0"
|
|
||||||
gridtolerance="10000"
|
|
||||||
guidetolerance="10"
|
|
||||||
objecttolerance="10"
|
|
||||||
inkscape:pageopacity="0.0"
|
|
||||||
inkscape:pageshadow="2"
|
|
||||||
inkscape:zoom="7.9195959"
|
|
||||||
inkscape:cx="17.757032"
|
|
||||||
inkscape:cy="7.298821"
|
|
||||||
inkscape:document-units="px"
|
|
||||||
inkscape:current-layer="layer1"
|
|
||||||
inkscape:window-width="984"
|
|
||||||
inkscape:window-height="852"
|
|
||||||
inkscape:window-x="148"
|
|
||||||
inkscape:window-y="66" />
|
|
||||||
<metadata
|
|
||||||
id="metadata7">
|
|
||||||
<rdf:RDF>
|
|
||||||
<cc:Work
|
|
||||||
rdf:about="">
|
|
||||||
<dc:format>image/svg+xml</dc:format>
|
|
||||||
<dc:type
|
|
||||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
|
||||||
</cc:Work>
|
|
||||||
</rdf:RDF>
|
|
||||||
</metadata>
|
|
||||||
<g
|
|
||||||
inkscape:label="Layer 1"
|
|
||||||
inkscape:groupmode="layer"
|
|
||||||
id="layer1"
|
|
||||||
transform="translate(-192.905,-516.02064)">
|
|
||||||
<path
|
|
||||||
style="fill:#000000"
|
|
||||||
d="M 197.67968,534.31563 C 197.40468,534.31208 196.21788,532.53719 195.04234,530.37143 L 192.905,526.43368 L 193.45901,525.87968 C 193.76371,525.57497 194.58269,525.32567 195.27896,525.32567 L 196.5449,525.32567 L 197.18129,527.33076 L 197.81768,529.33584 L 202.88215,523.79451 C 205.66761,520.74678 208.88522,517.75085 210.03239,517.13691 L 212.11815,516.02064 L 207.90871,520.80282 C 205.59351,523.43302 202.45735,527.55085 200.93947,529.95355 C 199.42159,532.35625 197.95468,534.31919 197.67968,534.31563 z "
|
|
||||||
id="path2223" />
|
|
||||||
</g>
|
|
||||||
</svg>
|
|
Before Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 57 KiB |
Before Width: | Height: | Size: 8.9 KiB |
Before Width: | Height: | Size: 69 KiB |
Before Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 11 KiB |
Before Width: | Height: | Size: 518 KiB |
Before Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 44 KiB |
Before Width: | Height: | Size: 182 KiB |
Before Width: | Height: | Size: 72 KiB |
Before Width: | Height: | Size: 1.1 MiB |
Before Width: | Height: | Size: 60 KiB |
Before Width: | Height: | Size: 59 KiB |
@ -1,55 +0,0 @@
|
|||||||
==========================
|
|
||||||
OpenStack Operations Guide
|
|
||||||
==========================
|
|
||||||
|
|
||||||
Abstract
|
|
||||||
~~~~~~~~
|
|
||||||
|
|
||||||
This guide provides information about operating OpenStack clouds.
|
|
||||||
|
|
||||||
We recommend that you turn to the `Installation Tutorials and Guides
|
|
||||||
<https://docs.openstack.org/project-install-guide/ocata/>`_,
|
|
||||||
which contains a step-by-step guide on how to manually install the
|
|
||||||
OpenStack packages and dependencies on your cloud.
|
|
||||||
|
|
||||||
While it is important for an operator to be familiar with the steps
|
|
||||||
involved in deploying OpenStack, we also strongly encourage you to
|
|
||||||
evaluate `OpenStack deployment tools
|
|
||||||
<https://docs.openstack.org/developer/openstack-projects.html>`_
|
|
||||||
and configuration-management tools, such as :term:`Puppet` or
|
|
||||||
:term:`Chef`, which can help automate this deployment process.
|
|
||||||
|
|
||||||
In this guide, we assume that you have successfully deployed an
|
|
||||||
OpenStack cloud and are able to perform basic operations
|
|
||||||
such as adding images, booting instances, and attaching volumes.
|
|
||||||
|
|
||||||
As your focus turns to stable operations, we recommend that you do skim
|
|
||||||
this guide to get a sense of the content. Some of this content is useful
|
|
||||||
to read in advance so that you can put best practices into effect to
|
|
||||||
simplify your life in the long run. Other content is more useful as a
|
|
||||||
reference that you might turn to when an unexpected event occurs (such
|
|
||||||
as a power failure), or to troubleshoot a particular problem.
|
|
||||||
|
|
||||||
Contents
|
|
||||||
~~~~~~~~
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
acknowledgements.rst
|
|
||||||
preface.rst
|
|
||||||
common/conventions.rst
|
|
||||||
ops-deployment-factors.rst
|
|
||||||
ops-planning.rst
|
|
||||||
ops-capacity-planning-scaling.rst
|
|
||||||
ops-lay-of-the-land.rst
|
|
||||||
ops-projects-users.rst
|
|
||||||
ops-user-facing-operations.rst
|
|
||||||
ops-maintenance.rst
|
|
||||||
ops-network-troubleshooting.rst
|
|
||||||
ops-logging-monitoring.rst
|
|
||||||
ops-backup-recovery.rst
|
|
||||||
ops-customize.rst
|
|
||||||
ops-advanced-configuration.rst
|
|
||||||
ops-upgrades.rst
|
|
||||||
appendix.rst
|
|
@ -1,151 +0,0 @@
|
|||||||
======================
|
|
||||||
Advanced Configuration
|
|
||||||
======================
|
|
||||||
|
|
||||||
OpenStack is intended to work well across a variety of installation
|
|
||||||
flavors, from very small private clouds to large public clouds. To
|
|
||||||
achieve this, the developers add configuration options to their code
|
|
||||||
that allow the behavior of the various components to be tweaked
|
|
||||||
depending on your needs. Unfortunately, it is not possible to cover all
|
|
||||||
possible deployments with the default configuration values.
|
|
||||||
|
|
||||||
At the time of writing, OpenStack has more than 3,000 configuration
|
|
||||||
options. You can see them documented at the
|
|
||||||
`OpenStack Configuration Reference
|
|
||||||
<https://docs.openstack.org/ocata/config-reference/config-overview.html>`_.
|
|
||||||
This chapter cannot hope to document all of these, but we do try to
|
|
||||||
introduce the important concepts so that you know where to go digging
|
|
||||||
for more information.
|
|
||||||
|
|
||||||
Differences Between Various Drivers
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Many OpenStack projects implement a driver layer, and each of these
|
|
||||||
drivers will implement its own configuration options. For example, in
|
|
||||||
OpenStack Compute (nova), there are various hypervisor drivers
|
|
||||||
implemented—libvirt, xenserver, hyper-v, and vmware, for example. Not
|
|
||||||
all of these hypervisor drivers have the same features, and each has
|
|
||||||
different tuning requirements.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
The currently implemented hypervisors are listed on the `OpenStack
|
|
||||||
Configuration Reference
|
|
||||||
<https://docs.openstack.org/ocata/config-reference/compute/hypervisors.html>`__.
|
|
||||||
You can see a matrix of the various features in OpenStack Compute
|
|
||||||
(nova) hypervisor drivers at the `Hypervisor support matrix
|
|
||||||
page <https://docs.openstack.org/developer/nova/support-matrix.html>`_.
|
|
||||||
|
|
||||||
The point we are trying to make here is that just because an option
|
|
||||||
exists doesn't mean that option is relevant to your driver choices.
|
|
||||||
Normally, the documentation notes which drivers the configuration
|
|
||||||
applies to.
|
|
||||||
|
|
||||||
Implementing Periodic Tasks
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Another common concept across various OpenStack projects is that of
|
|
||||||
periodic tasks. Periodic tasks are much like cron jobs on traditional
|
|
||||||
Unix systems, but they are run inside an OpenStack process. For example,
|
|
||||||
when OpenStack Compute (nova) needs to work out what images it can
|
|
||||||
remove from its local cache, it runs a periodic task to do this.
|
|
||||||
|
|
||||||
Periodic tasks are important to understand because of limitations in the
|
|
||||||
threading model that OpenStack uses. OpenStack uses cooperative
|
|
||||||
threading in Python, which means that if something long and complicated
|
|
||||||
is running, it will block other tasks inside that process from running
|
|
||||||
unless it voluntarily yields execution to another cooperative thread.
|
|
||||||
|
|
||||||
A tangible example of this is the ``nova-compute`` process. In order to
|
|
||||||
manage the image cache with libvirt, ``nova-compute`` has a periodic
|
|
||||||
process that scans the contents of the image cache. Part of this scan is
|
|
||||||
calculating a checksum for each of the images and making sure that
|
|
||||||
checksum matches what ``nova-compute`` expects it to be. However, images
|
|
||||||
can be very large, and these checksums can take a long time to generate.
|
|
||||||
At one point, before it was reported as a bug and fixed,
|
|
||||||
``nova-compute`` would block on this task and stop responding to RPC
|
|
||||||
requests. This was visible to users as failure of operations such as
|
|
||||||
spawning or deleting instances.
|
|
||||||
|
|
||||||
The take away from this is if you observe an OpenStack process that
|
|
||||||
appears to "stop" for a while and then continue to process normally, you
|
|
||||||
should check that periodic tasks aren't the problem. One way to do this
|
|
||||||
is to disable the periodic tasks by setting their interval to zero.
|
|
||||||
Additionally, you can configure how often these periodic tasks run—in
|
|
||||||
some cases, it might make sense to run them at a different frequency
|
|
||||||
from the default.
|
|
||||||
|
|
||||||
The frequency is defined separately for each periodic task. Therefore,
|
|
||||||
to disable every periodic task in OpenStack Compute (nova), you would
|
|
||||||
need to set a number of configuration options to zero. The current list
|
|
||||||
of configuration options you would need to set to zero are:
|
|
||||||
|
|
||||||
* ``bandwidth_poll_interval``
|
|
||||||
* ``sync_power_state_interval``
|
|
||||||
* ``heal_instance_info_cache_interval``
|
|
||||||
* ``host_state_interval``
|
|
||||||
* ``image_cache_manager_interval``
|
|
||||||
* ``reclaim_instance_interval``
|
|
||||||
* ``volume_usage_poll_interval``
|
|
||||||
* ``shelved_poll_interval``
|
|
||||||
* ``shelved_offload_time``
|
|
||||||
* ``instance_delete_interval``
|
|
||||||
|
|
||||||
To set a configuration option to zero, include a line such as
|
|
||||||
``image_cache_manager_interval=0`` in your ``nova.conf`` file.
|
|
||||||
|
|
||||||
This list will change between releases, so please refer to your
|
|
||||||
configuration guide for up-to-date information.
|
|
||||||
|
|
||||||
Specific Configuration Topics
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This section covers specific examples of configuration options you might
|
|
||||||
consider tuning. It is by no means an exhaustive list.
|
|
||||||
|
|
||||||
Security Configuration for Compute, Networking, and Storage
|
|
||||||
-----------------------------------------------------------
|
|
||||||
|
|
||||||
The `OpenStack Security Guide <https://docs.openstack.org/security-guide/>`_
|
|
||||||
provides a deep dive into securing an OpenStack cloud, including
|
|
||||||
SSL/TLS, key management, PKI and certificate management, data transport
|
|
||||||
and privacy concerns, and compliance.
|
|
||||||
|
|
||||||
High Availability
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
The `OpenStack High Availability
|
|
||||||
Guide <https://docs.openstack.org/ha-guide/index.html>`_ offers
|
|
||||||
suggestions for elimination of a single point of failure that could
|
|
||||||
cause system downtime. While it is not a completely prescriptive
|
|
||||||
document, it offers methods and techniques for avoiding downtime and
|
|
||||||
data loss.
|
|
||||||
|
|
||||||
Enabling IPv6 Support
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
You can follow the progress being made on IPV6 support by watching the
|
|
||||||
`neutron IPv6 Subteam at
|
|
||||||
work <https://wiki.openstack.org/wiki/Meetings/Neutron-IPv6-Subteam>`_.
|
|
||||||
|
|
||||||
By modifying your configuration setup, you can set up IPv6 when using
|
|
||||||
``nova-network`` for networking, and a tested setup is documented for
|
|
||||||
FlatDHCP and a multi-host configuration. The key is to make
|
|
||||||
``nova-network`` think a ``radvd`` command ran successfully. The entire
|
|
||||||
configuration is detailed in a Cybera blog post, `“An IPv6 enabled
|
|
||||||
cloud” <http://www.cybera.ca/news-and-events/tech-radar/an-ipv6-enabled-cloud/>`_.
|
|
||||||
|
|
||||||
Geographical Considerations for Object Storage
|
|
||||||
----------------------------------------------
|
|
||||||
|
|
||||||
Support for global clustering of object storage servers is available for
|
|
||||||
all supported releases. You would implement these global clusters to
|
|
||||||
ensure replication across geographic areas in case of a natural disaster
|
|
||||||
and also to ensure that users can write or access their objects more
|
|
||||||
quickly based on the closest data center. You configure a default region
|
|
||||||
with one zone for each cluster, but be sure your network (WAN) can
|
|
||||||
handle the additional request and response load between zones as you add
|
|
||||||
more zones and build a ring that handles more zones. Refer to
|
|
||||||
`Geographically Distributed Clusters
|
|
||||||
<https://docs.openstack.org/developer/swift/admin_guide.html#geographically-distributed-clusters>`_
|
|
||||||
in the documentation for additional information.
|
|
@ -1,219 +0,0 @@
|
|||||||
===================
|
|
||||||
Backup and Recovery
|
|
||||||
===================
|
|
||||||
|
|
||||||
Standard backup best practices apply when creating your OpenStack backup
|
|
||||||
policy. For example, how often to back up your data is closely related
|
|
||||||
to how quickly you need to recover from data loss.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
If you cannot have any data loss at all, you should also focus on a
|
|
||||||
highly available deployment. The `OpenStack High Availability
|
|
||||||
Guide <https://docs.openstack.org/ha-guide/index.html>`_ offers
|
|
||||||
suggestions for elimination of a single point of failure that could
|
|
||||||
cause system downtime. While it is not a completely prescriptive
|
|
||||||
document, it offers methods and techniques for avoiding downtime and
|
|
||||||
data loss.
|
|
||||||
|
|
||||||
Other backup considerations include:
|
|
||||||
|
|
||||||
* How many backups to keep?
|
|
||||||
* Should backups be kept off-site?
|
|
||||||
* How often should backups be tested?
|
|
||||||
|
|
||||||
Just as important as a backup policy is a recovery policy (or at least
|
|
||||||
recovery testing).
|
|
||||||
|
|
||||||
What to Back Up
|
|
||||||
~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
While OpenStack is composed of many components and moving parts, backing
|
|
||||||
up the critical data is quite simple.
|
|
||||||
|
|
||||||
This chapter describes only how to back up configuration files and
|
|
||||||
databases that the various OpenStack components need to run. This
|
|
||||||
chapter does not describe how to back up objects inside Object Storage
|
|
||||||
or data contained inside Block Storage. Generally these areas are left
|
|
||||||
for users to back up on their own.
|
|
||||||
|
|
||||||
Database Backups
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The example OpenStack architecture designates the cloud controller as
|
|
||||||
the MySQL server. This MySQL server hosts the databases for nova,
|
|
||||||
glance, cinder, and keystone. With all of these databases in one place,
|
|
||||||
it's very easy to create a database backup:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mysqldump --opt --all-databases > openstack.sql
|
|
||||||
|
|
||||||
If you only want to backup a single database, you can instead run:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mysqldump --opt nova > nova.sql
|
|
||||||
|
|
||||||
where ``nova`` is the database you want to back up.
|
|
||||||
|
|
||||||
You can easily automate this process by creating a cron job that runs
|
|
||||||
the following script once per day:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
#!/bin/bash
|
|
||||||
backup_dir="/var/lib/backups/mysql"
|
|
||||||
filename="${backup_dir}/mysql-`hostname`-`eval date +%Y%m%d`.sql.gz"
|
|
||||||
# Dump the entire MySQL database
|
|
||||||
/usr/bin/mysqldump --opt --all-databases | gzip > $filename
|
|
||||||
# Delete backups older than 7 days
|
|
||||||
find $backup_dir -ctime +7 -type f -delete
|
|
||||||
|
|
||||||
This script dumps the entire MySQL database and deletes any backups
|
|
||||||
older than seven days.
|
|
||||||
|
|
||||||
File System Backups
|
|
||||||
~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This section discusses which files and directories should be backed up
|
|
||||||
regularly, organized by service.
|
|
||||||
|
|
||||||
Compute
|
|
||||||
-------
|
|
||||||
|
|
||||||
The ``/etc/nova`` directory on both the cloud controller and compute
|
|
||||||
nodes should be regularly backed up.
|
|
||||||
|
|
||||||
``/var/log/nova`` does not need to be backed up if you have all logs
|
|
||||||
going to a central area. It is highly recommended to use a central
|
|
||||||
logging server or back up the log directory.
|
|
||||||
|
|
||||||
``/var/lib/nova`` is another important directory to back up. The
|
|
||||||
exception to this is the ``/var/lib/nova/instances`` subdirectory on
|
|
||||||
compute nodes. This subdirectory contains the KVM images of running
|
|
||||||
instances. You would want to back up this directory only if you need to
|
|
||||||
maintain backup copies of all instances. Under most circumstances, you
|
|
||||||
do not need to do this, but this can vary from cloud to cloud and your
|
|
||||||
service levels. Also be aware that making a backup of a live KVM
|
|
||||||
instance can cause that instance to not boot properly if it is ever
|
|
||||||
restored from a backup.
|
|
||||||
|
|
||||||
Image Catalog and Delivery
|
|
||||||
--------------------------
|
|
||||||
|
|
||||||
``/etc/glance`` and ``/var/log/glance`` follow the same rules as their
|
|
||||||
nova counterparts.
|
|
||||||
|
|
||||||
``/var/lib/glance`` should also be backed up. Take special notice of
|
|
||||||
``/var/lib/glance/images``. If you are using a file-based back end of
|
|
||||||
glance, ``/var/lib/glance/images`` is where the images are stored and
|
|
||||||
care should be taken.
|
|
||||||
|
|
||||||
There are two ways to ensure stability with this directory. The first is
|
|
||||||
to make sure this directory is run on a RAID array. If a disk fails, the
|
|
||||||
directory is available. The second way is to use a tool such as rsync to
|
|
||||||
replicate the images to another server:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# rsync -az --progress /var/lib/glance/images backup-server:/var/lib/glance/images/
|
|
||||||
|
|
||||||
Identity
|
|
||||||
--------
|
|
||||||
|
|
||||||
``/etc/keystone`` and ``/var/log/keystone`` follow the same rules as
|
|
||||||
other components.
|
|
||||||
|
|
||||||
``/var/lib/keystone``, although it should not contain any data being
|
|
||||||
used, can also be backed up just in case.
|
|
||||||
|
|
||||||
Block Storage
|
|
||||||
-------------
|
|
||||||
|
|
||||||
``/etc/cinder`` and ``/var/log/cinder`` follow the same rules as other
|
|
||||||
components.
|
|
||||||
|
|
||||||
``/var/lib/cinder`` should also be backed up.
|
|
||||||
|
|
||||||
Networking
|
|
||||||
----------
|
|
||||||
|
|
||||||
``/etc/neutron`` and ``/var/log/neutron`` follow the same rules as other
|
|
||||||
components.
|
|
||||||
|
|
||||||
``/var/lib/neutron`` should also be backed up.
|
|
||||||
|
|
||||||
Object Storage
|
|
||||||
--------------
|
|
||||||
|
|
||||||
``/etc/swift`` is very important to have backed up. This directory
|
|
||||||
contains the swift configuration files as well as the ring files and
|
|
||||||
ring :term:`builder files <builder file>`, which if lost, render the data
|
|
||||||
on your cluster inaccessible. A best practice is to copy the builder files
|
|
||||||
to all storage nodes along with the ring files. Multiple backup copies are
|
|
||||||
spread throughout your storage cluster.
|
|
||||||
|
|
||||||
Telemetry
|
|
||||||
---------
|
|
||||||
|
|
||||||
Back up the ``/etc/ceilometer`` directory containing Telemetry configuration
|
|
||||||
files.
|
|
||||||
|
|
||||||
Orchestration
|
|
||||||
-------------
|
|
||||||
|
|
||||||
Back up HOT template ``yaml`` files, and the ``/etc/heat/`` directory
|
|
||||||
containing Orchestration configuration files.
|
|
||||||
|
|
||||||
Recovering Backups
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Recovering backups is a fairly simple process. To begin, first ensure
|
|
||||||
that the service you are recovering is not running. For example, to do a
|
|
||||||
full recovery of ``nova`` on the cloud controller, first stop all
|
|
||||||
``nova`` services:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# stop nova-api
|
|
||||||
# stop nova-consoleauth
|
|
||||||
# stop nova-novncproxy
|
|
||||||
# stop nova-objectstore
|
|
||||||
# stop nova-scheduler
|
|
||||||
|
|
||||||
Now you can import a previously backed-up database:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mysql nova < nova.sql
|
|
||||||
|
|
||||||
You can also restore backed-up nova directories:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mv /etc/nova{,.orig}
|
|
||||||
# cp -a /path/to/backup/nova /etc/
|
|
||||||
|
|
||||||
Once the files are restored, start everything back up:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# start mysql
|
|
||||||
# for i in nova-api nova-consoleauth nova-novncproxy \
|
|
||||||
nova-objectstore nova-scheduler
|
|
||||||
> do
|
|
||||||
> start $i
|
|
||||||
> done
|
|
||||||
|
|
||||||
Other services follow the same process, with their respective
|
|
||||||
directories and databases.
|
|
||||||
|
|
||||||
Summary
|
|
||||||
~~~~~~~
|
|
||||||
|
|
||||||
Backup and subsequent recovery is one of the first tasks system
|
|
||||||
administrators learn. However, each system has different items that need
|
|
||||||
attention. By taking care of your database, image service, and
|
|
||||||
appropriate file system locations, you can be assured that you can
|
|
||||||
handle any event requiring recovery.
|
|
@ -1,423 +0,0 @@
|
|||||||
.. _capacity-planning-scaling:
|
|
||||||
|
|
||||||
=============================
|
|
||||||
Capacity planning and scaling
|
|
||||||
=============================
|
|
||||||
|
|
||||||
Cloud-based applications typically request more discrete hardware (horizontal
|
|
||||||
scaling) as opposed to traditional applications, which require larger hardware
|
|
||||||
to scale (vertical scaling).
|
|
||||||
|
|
||||||
OpenStack is designed to be horizontally scalable. Rather than switching
|
|
||||||
to larger servers, you procure more servers and simply install identically
|
|
||||||
configured services. Ideally, you scale out and load balance among groups of
|
|
||||||
functionally identical services (for example, compute nodes or ``nova-api``
|
|
||||||
nodes), that communicate on a message bus.
|
|
||||||
|
|
||||||
Determining cloud scalability
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Determining the scalability of your cloud and how to improve it requires
|
|
||||||
balancing many variables. No one solution meets everyone's scalability goals.
|
|
||||||
However, it is helpful to track a number of metrics. You can define
|
|
||||||
virtual hardware templates called "flavors" in OpenStack, which will impact
|
|
||||||
your cloud scaling decisions. These templates define sizes for memory in RAM,
|
|
||||||
root disk size, amount of ephemeral data disk space available, and the number
|
|
||||||
of CPU cores.
|
|
||||||
|
|
||||||
The default OpenStack flavors are shown in :ref:`table_default_flavors`.
|
|
||||||
|
|
||||||
.. _table_default_flavors:
|
|
||||||
|
|
||||||
.. list-table:: Table. OpenStack default flavors
|
|
||||||
:widths: 20 20 20 20 20
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* - Name
|
|
||||||
- Virtual cores
|
|
||||||
- Memory
|
|
||||||
- Disk
|
|
||||||
- Ephemeral
|
|
||||||
* - m1.tiny
|
|
||||||
- 1
|
|
||||||
- 512 MB
|
|
||||||
- 1 GB
|
|
||||||
- 0 GB
|
|
||||||
* - m1.small
|
|
||||||
- 1
|
|
||||||
- 2 GB
|
|
||||||
- 10 GB
|
|
||||||
- 20 GB
|
|
||||||
* - m1.medium
|
|
||||||
- 2
|
|
||||||
- 4 GB
|
|
||||||
- 10 GB
|
|
||||||
- 40 GB
|
|
||||||
* - m1.large
|
|
||||||
- 4
|
|
||||||
- 8 GB
|
|
||||||
- 10 GB
|
|
||||||
- 80 GB
|
|
||||||
* - m1.xlarge
|
|
||||||
- 8
|
|
||||||
- 16 GB
|
|
||||||
- 10 GB
|
|
||||||
- 160 GB
|
|
||||||
|
|
||||||
The starting point is the core count of your cloud. By applying
|
|
||||||
some ratios, you can gather information about:
|
|
||||||
|
|
||||||
- The number of virtual machines (VMs) you expect to run,
|
|
||||||
``((overcommit fraction × cores) / virtual cores per instance)``
|
|
||||||
|
|
||||||
- How much storage is required ``(flavor disk size × number of instances)``
|
|
||||||
|
|
||||||
You can use these ratios to determine how much additional infrastructure
|
|
||||||
you need to support your cloud.
|
|
||||||
|
|
||||||
Here is an example using the ratios for gathering scalability
|
|
||||||
information for the number of VMs expected as well as the storage
|
|
||||||
needed. The following numbers support (200 / 2) × 16 = 1600 VM instances
|
|
||||||
and require 80 TB of storage for ``/var/lib/nova/instances``:
|
|
||||||
|
|
||||||
- 200 physical cores.
|
|
||||||
|
|
||||||
- Most instances are size m1.medium (two virtual cores, 50 GB of
|
|
||||||
storage).
|
|
||||||
|
|
||||||
- Default CPU overcommit ratio (``cpu_allocation_ratio`` in the ``nova.conf``
|
|
||||||
file) of 16:1.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
Regardless of the overcommit ratio, an instance can not be placed
|
|
||||||
on any physical node with fewer raw (pre-overcommit) resources than
|
|
||||||
instance flavor requires.
|
|
||||||
|
|
||||||
However, you need more than the core count alone to estimate the load
|
|
||||||
that the API services, database servers, and queue servers are likely to
|
|
||||||
encounter. You must also consider the usage patterns of your cloud.
|
|
||||||
|
|
||||||
As a specific example, compare a cloud that supports a managed
|
|
||||||
web-hosting platform with one running integration tests for a
|
|
||||||
development project that creates one VM per code commit. In the former,
|
|
||||||
the heavy work of creating a VM happens only every few months, whereas
|
|
||||||
the latter puts constant heavy load on the cloud controller. You must
|
|
||||||
consider your average VM lifetime, as a larger number generally means
|
|
||||||
less load on the cloud controller.
|
|
||||||
|
|
||||||
Aside from the creation and termination of VMs, you must consider the
|
|
||||||
impact of users accessing the service particularly on ``nova-api`` and
|
|
||||||
its associated database. Listing instances garners a great deal of
|
|
||||||
information and, given the frequency with which users run this
|
|
||||||
operation, a cloud with a large number of users can increase the load
|
|
||||||
significantly. This can occur even without their knowledge. For example,
|
|
||||||
leaving the OpenStack dashboard instances tab open in the browser
|
|
||||||
refreshes the list of VMs every 30 seconds.
|
|
||||||
|
|
||||||
After you consider these factors, you can determine how many cloud
|
|
||||||
controller cores you require. A typical eight core, 8 GB of RAM server
|
|
||||||
is sufficient for up to a rack of compute nodes — given the above
|
|
||||||
caveats.
|
|
||||||
|
|
||||||
You must also consider key hardware specifications for the performance
|
|
||||||
of user VMs, as well as budget and performance needs, including storage
|
|
||||||
performance (spindles/core), memory availability (RAM/core), network
|
|
||||||
bandwidth hardware specifications and (Gbps/core), and overall
|
|
||||||
CPU performance (CPU/core).
|
|
||||||
|
|
||||||
.. tip::
|
|
||||||
|
|
||||||
For a discussion of metric tracking, including how to extract
|
|
||||||
metrics from your cloud, see the `OpenStack Operations Guide
|
|
||||||
<https://docs.openstack.org/ops-guide/ops-logging-monitoring.html>`_.
|
|
||||||
|
|
||||||
Adding cloud controller nodes
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
You can facilitate the horizontal expansion of your cloud by adding
|
|
||||||
nodes. Adding compute nodes is straightforward since they are easily picked up
|
|
||||||
by the existing installation. However, you must consider some important
|
|
||||||
points when you design your cluster to be highly available.
|
|
||||||
|
|
||||||
A cloud controller node runs several different services. You
|
|
||||||
can install services that communicate only using the message queue
|
|
||||||
internally— ``nova-scheduler`` and ``nova-console`` on a new server for
|
|
||||||
expansion. However, other integral parts require more care.
|
|
||||||
|
|
||||||
You should load balance user-facing services such as dashboard,
|
|
||||||
``nova-api``, or the Object Storage proxy. Use any standard HTTP
|
|
||||||
load-balancing method (DNS round robin, hardware load balancer, or
|
|
||||||
software such as Pound or HAProxy). One caveat with dashboard is the VNC
|
|
||||||
proxy, which uses the WebSocket protocol— something that an L7 load
|
|
||||||
balancer might struggle with. See also `Horizon session storage
|
|
||||||
<https://docs.openstack.org/developer/horizon/topics/deployment.html#session-storage>`_.
|
|
||||||
|
|
||||||
You can configure some services, such as ``nova-api`` and
|
|
||||||
``glance-api``, to use multiple processes by changing a flag in their
|
|
||||||
configuration file allowing them to share work between multiple cores on
|
|
||||||
the one machine.
|
|
||||||
|
|
||||||
.. tip::
|
|
||||||
|
|
||||||
Several options are available for MySQL load balancing, and the
|
|
||||||
supported AMQP brokers have built-in clustering support. Information
|
|
||||||
on how to configure these and many of the other services can be
|
|
||||||
found in the `Operations Guide
|
|
||||||
<https://docs.openstack.org/ops-guide/operations.html>`_.
|
|
||||||
|
|
||||||
Segregating your cloud
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Segregating your cloud is needed when users require different regions for legal
|
|
||||||
considerations for data storage, redundancy across earthquake fault lines, or
|
|
||||||
for low-latency API calls. It can be segregated by *cells*, *regions*,
|
|
||||||
*availability zones*, or *host aggregates*.
|
|
||||||
|
|
||||||
Each method provides different functionality and can be best divided
|
|
||||||
into two groups:
|
|
||||||
|
|
||||||
- Cells and regions, which segregate an entire cloud and result in
|
|
||||||
running separate Compute deployments.
|
|
||||||
|
|
||||||
- :term:`Availability zones <availability zone>` and host aggregates,
|
|
||||||
which merely divide a single Compute deployment.
|
|
||||||
|
|
||||||
:ref:`table_segregation_methods` provides a comparison view of each
|
|
||||||
segregation method currently provided by OpenStack Compute.
|
|
||||||
|
|
||||||
.. _table_segregation_methods:
|
|
||||||
|
|
||||||
.. list-table:: Table. OpenStack segregation methods
|
|
||||||
:widths: 20 20 20 20 20
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* -
|
|
||||||
- Cells
|
|
||||||
- Regions
|
|
||||||
- Availability zones
|
|
||||||
- Host aggregates
|
|
||||||
* - **Use**
|
|
||||||
- A single :term:`API endpoint` for compute, or you require a second
|
|
||||||
level of scheduling.
|
|
||||||
- Discrete regions with separate API endpoints and no coordination
|
|
||||||
between regions.
|
|
||||||
- Logical separation within your nova deployment for physical isolation
|
|
||||||
or redundancy.
|
|
||||||
- To schedule a group of hosts with common features.
|
|
||||||
* - **Example**
|
|
||||||
- A cloud with multiple sites where you can schedule VMs "anywhere" or on
|
|
||||||
a particular site.
|
|
||||||
- A cloud with multiple sites, where you schedule VMs to a particular
|
|
||||||
site and you want a shared infrastructure.
|
|
||||||
- A single-site cloud with equipment fed by separate power supplies.
|
|
||||||
- Scheduling to hosts with trusted hardware support.
|
|
||||||
* - **Overhead**
|
|
||||||
- Considered experimental. A new service, nova-cells. Each cell has a full
|
|
||||||
nova installation except nova-api.
|
|
||||||
- A different API endpoint for every region. Each region has a full nova
|
|
||||||
installation.
|
|
||||||
- Configuration changes to ``nova.conf``.
|
|
||||||
- Configuration changes to ``nova.conf``.
|
|
||||||
* - **Shared services**
|
|
||||||
- Keystone, ``nova-api``
|
|
||||||
- Keystone
|
|
||||||
- Keystone, All nova services
|
|
||||||
- Keystone, All nova services
|
|
||||||
|
|
||||||
Cells and regions
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
OpenStack Compute cells are designed to allow running the cloud in a
|
|
||||||
distributed fashion without having to use more complicated technologies,
|
|
||||||
or be invasive to existing nova installations. Hosts in a cloud are
|
|
||||||
partitioned into groups called *cells*. Cells are configured in a tree.
|
|
||||||
The top-level cell ("API cell") has a host that runs the ``nova-api``
|
|
||||||
service, but no ``nova-compute`` services. Each child cell runs all of
|
|
||||||
the other typical ``nova-*`` services found in a regular installation,
|
|
||||||
except for the ``nova-api`` service. Each cell has its own message queue
|
|
||||||
and database service and also runs ``nova-cells``, which manages the
|
|
||||||
communication between the API cell and child cells.
|
|
||||||
|
|
||||||
This allows for a single API server being used to control access to
|
|
||||||
multiple cloud installations. Introducing a second level of scheduling
|
|
||||||
(the cell selection), in addition to the regular ``nova-scheduler``
|
|
||||||
selection of hosts, provides greater flexibility to control where
|
|
||||||
virtual machines are run.
|
|
||||||
|
|
||||||
Unlike having a single API endpoint, regions have a separate API
|
|
||||||
endpoint per installation, allowing for a more discrete separation.
|
|
||||||
Users wanting to run instances across sites have to explicitly select a
|
|
||||||
region. However, the additional complexity of a running a new service is
|
|
||||||
not required.
|
|
||||||
|
|
||||||
The OpenStack dashboard (horizon) can be configured to use multiple
|
|
||||||
regions. This can be configured through the ``AVAILABLE_REGIONS``
|
|
||||||
parameter.
|
|
||||||
|
|
||||||
Availability zones and host aggregates
|
|
||||||
--------------------------------------
|
|
||||||
|
|
||||||
You can use availability zones, host aggregates, or both to partition a
|
|
||||||
nova deployment. Both methods are configured and implemented in a similar
|
|
||||||
way.
|
|
||||||
|
|
||||||
Availability zone
|
|
||||||
^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
This enables you to arrange OpenStack compute hosts into logical groups
|
|
||||||
and provides a form of physical isolation and redundancy from other
|
|
||||||
availability zones, such as by using a separate power supply or network
|
|
||||||
equipment.
|
|
||||||
|
|
||||||
You define the availability zone in which a specified compute host
|
|
||||||
resides locally on each server. An availability zone is commonly used to
|
|
||||||
identify a set of servers that have a common attribute. For instance, if
|
|
||||||
some of the racks in your data center are on a separate power source,
|
|
||||||
you can put servers in those racks in their own availability zone.
|
|
||||||
Availability zones can also help separate different classes of hardware.
|
|
||||||
|
|
||||||
When users provision resources, they can specify from which availability
|
|
||||||
zone they want their instance to be built. This allows cloud consumers
|
|
||||||
to ensure that their application resources are spread across disparate
|
|
||||||
machines to achieve high availability in the event of hardware failure.
|
|
||||||
|
|
||||||
Host aggregates zone
|
|
||||||
^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
This enables you to partition OpenStack Compute deployments into logical
|
|
||||||
groups for load balancing and instance distribution. You can use host
|
|
||||||
aggregates to further partition an availability zone. For example, you
|
|
||||||
might use host aggregates to partition an availability zone into groups
|
|
||||||
of hosts that either share common resources, such as storage and
|
|
||||||
network, or have a special property, such as trusted computing
|
|
||||||
hardware.
|
|
||||||
|
|
||||||
A common use of host aggregates is to provide information for use with
|
|
||||||
the ``nova-scheduler``. For example, you might use a host aggregate to
|
|
||||||
group a set of hosts that share specific flavors or images.
|
|
||||||
|
|
||||||
The general case for this is setting key-value pairs in the aggregate
|
|
||||||
metadata and matching key-value pairs in flavor's ``extra_specs``
|
|
||||||
metadata. The ``AggregateInstanceExtraSpecsFilter`` in the filter
|
|
||||||
scheduler will enforce that instances be scheduled only on hosts in
|
|
||||||
aggregates that define the same key to the same value.
|
|
||||||
|
|
||||||
An advanced use of this general concept allows different flavor types to
|
|
||||||
run with different CPU and RAM allocation ratios so that high-intensity
|
|
||||||
computing loads and low-intensity development and testing systems can
|
|
||||||
share the same cloud without either starving the high-use systems or
|
|
||||||
wasting resources on low-utilization systems. This works by setting
|
|
||||||
``metadata`` in your host aggregates and matching ``extra_specs`` in
|
|
||||||
your flavor types.
|
|
||||||
|
|
||||||
The first step is setting the aggregate metadata keys
|
|
||||||
``cpu_allocation_ratio`` and ``ram_allocation_ratio`` to a
|
|
||||||
floating-point value. The filter schedulers ``AggregateCoreFilter`` and
|
|
||||||
``AggregateRamFilter`` will use those values rather than the global
|
|
||||||
defaults in ``nova.conf`` when scheduling to hosts in the aggregate. Be
|
|
||||||
cautious when using this feature, since each host can be in multiple
|
|
||||||
aggregates, but should have only one allocation ratio for
|
|
||||||
each resources. It is up to you to avoid putting a host in multiple
|
|
||||||
aggregates that define different values for the same resource.
|
|
||||||
|
|
||||||
This is the first half of the equation. To get flavor types that are
|
|
||||||
guaranteed a particular ratio, you must set the ``extra_specs`` in the
|
|
||||||
flavor type to the key-value pair you want to match in the aggregate.
|
|
||||||
For example, if you define ``extra_specs`` ``cpu_allocation_ratio`` to
|
|
||||||
"1.0", then instances of that type will run in aggregates only where the
|
|
||||||
metadata key ``cpu_allocation_ratio`` is also defined as "1.0." In
|
|
||||||
practice, it is better to define an additional key-value pair in the
|
|
||||||
aggregate metadata to match on rather than match directly on
|
|
||||||
``cpu_allocation_ratio`` or ``core_allocation_ratio``. This allows
|
|
||||||
better abstraction. For example, by defining a key ``overcommit`` and
|
|
||||||
setting a value of "high," "medium," or "low," you could then tune the
|
|
||||||
numeric allocation ratios in the aggregates without also needing to
|
|
||||||
change all flavor types relating to them.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Previously, all services had an availability zone. Currently, only
|
|
||||||
the ``nova-compute`` service has its own availability zone. Services
|
|
||||||
such as ``nova-scheduler``, ``nova-network``, and ``nova-conductor``
|
|
||||||
have always spanned all availability zones.
|
|
||||||
|
|
||||||
When you run any of the following operations, the services appear in
|
|
||||||
their own internal availability zone
|
|
||||||
(CONF.internal_service_availability_zone):
|
|
||||||
|
|
||||||
- :command:`openstack host list` (os-hosts)
|
|
||||||
|
|
||||||
- :command:`euca-describe-availability-zones verbose`
|
|
||||||
|
|
||||||
- :command:`openstack compute service list`
|
|
||||||
|
|
||||||
The internal availability zone is hidden in
|
|
||||||
euca-describe-availability_zones (nonverbose).
|
|
||||||
|
|
||||||
CONF.node_availability_zone has been renamed to
|
|
||||||
CONF.default_availability_zone and is used only by the
|
|
||||||
``nova-api`` and ``nova-scheduler`` services.
|
|
||||||
|
|
||||||
CONF.node_availability_zone still works but is deprecated.
|
|
||||||
|
|
||||||
Scalable Hardware
|
|
||||||
~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
While several resources already exist to help with deploying and
|
|
||||||
installing OpenStack, it's very important to make sure that you have
|
|
||||||
your deployment planned out ahead of time. This guide presumes that you
|
|
||||||
have set aside a rack for the OpenStack cloud but also offers
|
|
||||||
suggestions for when and what to scale.
|
|
||||||
|
|
||||||
Hardware Procurement
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
“The Cloud” has been described as a volatile environment where servers
|
|
||||||
can be created and terminated at will. While this may be true, it does
|
|
||||||
not mean that your servers must be volatile. Ensuring that your cloud's
|
|
||||||
hardware is stable and configured correctly means that your cloud
|
|
||||||
environment remains up and running.
|
|
||||||
|
|
||||||
OpenStack can be deployed on any hardware supported by an
|
|
||||||
OpenStack compatible Linux distribution.
|
|
||||||
|
|
||||||
Hardware does not have to be consistent, but it should at least have the
|
|
||||||
same type of CPU to support instance migration.
|
|
||||||
|
|
||||||
The typical hardware recommended for use with OpenStack is the standard
|
|
||||||
value-for-money offerings that most hardware vendors stock. It should be
|
|
||||||
straightforward to divide your procurement into building blocks such as
|
|
||||||
"compute," "object storage," and "cloud controller," and request as many
|
|
||||||
of these as you need. Alternatively, any existing servers you have that meet
|
|
||||||
performance requirements and virtualization technology are likely to support
|
|
||||||
OpenStack.
|
|
||||||
|
|
||||||
Capacity Planning
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
OpenStack is designed to increase in size in a straightforward manner.
|
|
||||||
Taking into account the considerations previous mentioned, particularly on the
|
|
||||||
sizing of the cloud controller, it should be possible to procure additional
|
|
||||||
compute or object storage nodes as needed. New nodes do not need to be the same
|
|
||||||
specification or vendor as existing nodes.
|
|
||||||
|
|
||||||
For compute nodes, ``nova-scheduler`` will manage differences in
|
|
||||||
sizing with core count and RAM. However, you should consider that the user
|
|
||||||
experience changes with differing CPU speeds. When adding object storage
|
|
||||||
nodes, a :term:`weight` should be specified that reflects the
|
|
||||||
:term:`capability` of the node.
|
|
||||||
|
|
||||||
Monitoring the resource usage and user growth will enable you to know
|
|
||||||
when to procure. The `Logging and Monitoring
|
|
||||||
<https://docs.openstack.org/ops-guide/ops-logging-monitoring.html>`_
|
|
||||||
chapte in the Operations Guide details some useful metrics.
|
|
||||||
|
|
||||||
Burn-in Testing
|
|
||||||
---------------
|
|
||||||
|
|
||||||
The chances of failure for the server's hardware are high at the start
|
|
||||||
and the end of its life. As a result, dealing with hardware failures
|
|
||||||
while in production can be avoided by appropriate burn-in testing to
|
|
||||||
attempt to trigger the early-stage failures. The general principle is to
|
|
||||||
stress the hardware to its limits. Examples of burn-in tests include
|
|
||||||
running a CPU or disk benchmark for several days.
|
|
@ -1,309 +0,0 @@
|
|||||||
==================================================
|
|
||||||
Customizing the OpenStack Compute (nova) Scheduler
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
Many OpenStack projects allow for customization of specific features
|
|
||||||
using a driver architecture. You can write a driver that conforms to a
|
|
||||||
particular interface and plug it in through configuration. For example,
|
|
||||||
you can easily plug in a new scheduler for Compute. The existing
|
|
||||||
schedulers for Compute are feature full and well documented at `Scheduling
|
|
||||||
<https://docs.openstack.org/ocata/config-reference/compute/schedulers.html>`_.
|
|
||||||
However, depending on your user's use cases, the existing schedulers
|
|
||||||
might not meet your requirements. You might need to create a new scheduler.
|
|
||||||
|
|
||||||
To create a scheduler, you must inherit from the class
|
|
||||||
``nova.scheduler.driver.Scheduler``. Of the five methods that you can
|
|
||||||
override, you *must* override the two methods marked with an asterisk
|
|
||||||
(\*) below:
|
|
||||||
|
|
||||||
- ``update_service_capabilities``
|
|
||||||
|
|
||||||
- ``hosts_up``
|
|
||||||
|
|
||||||
- ``group_hosts``
|
|
||||||
|
|
||||||
- \* ``schedule_run_instance``
|
|
||||||
|
|
||||||
- \* ``select_destinations``
|
|
||||||
|
|
||||||
To demonstrate customizing OpenStack, we'll create an example of a
|
|
||||||
Compute scheduler that randomly places an instance on a subset of hosts,
|
|
||||||
depending on the originating IP address of the request and the prefix of
|
|
||||||
the hostname. Such an example could be useful when you have a group of
|
|
||||||
users on a subnet and you want all of their instances to start within
|
|
||||||
some subset of your hosts.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
This example is for illustrative purposes only. It should not be
|
|
||||||
used as a scheduler for Compute without further development and
|
|
||||||
testing.
|
|
||||||
|
|
||||||
When you join the screen session that ``stack.sh`` starts with
|
|
||||||
``screen -r stack``, you are greeted with many screen windows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
0$ shell* 1$ key 2$ horizon ... 9$ n-api ... 14$ n-sch ...
|
|
||||||
|
|
||||||
|
|
||||||
``shell``
|
|
||||||
A shell where you can get some work done
|
|
||||||
|
|
||||||
``key``
|
|
||||||
The keystone service
|
|
||||||
|
|
||||||
``horizon``
|
|
||||||
The horizon dashboard web application
|
|
||||||
|
|
||||||
``n-{name}``
|
|
||||||
The nova services
|
|
||||||
|
|
||||||
``n-sch``
|
|
||||||
The nova scheduler service
|
|
||||||
|
|
||||||
**To create the scheduler and plug it in through configuration**
|
|
||||||
|
|
||||||
#. The code for OpenStack lives in ``/opt/stack``, so go to the ``nova``
|
|
||||||
directory and edit your scheduler module. Change to the directory where
|
|
||||||
``nova`` is installed:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ cd /opt/stack/nova
|
|
||||||
|
|
||||||
#. Create the ``ip_scheduler.py`` Python source code file:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ vim nova/scheduler/ip_scheduler.py
|
|
||||||
|
|
||||||
#. The code shown below is a driver that will
|
|
||||||
schedule servers to hosts based on IP address as explained at the
|
|
||||||
beginning of the section. Copy the code into ``ip_scheduler.py``. When
|
|
||||||
you are done, save and close the file.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
|
||||||
# Copyright (c) 2014 OpenStack Foundation
|
|
||||||
# All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
||||||
# not use this file except in compliance with the License. You may obtain
|
|
||||||
# a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
||||||
# License for the specific language governing permissions and limitations
|
|
||||||
# under the License.
|
|
||||||
|
|
||||||
"""
|
|
||||||
IP Scheduler implementation
|
|
||||||
"""
|
|
||||||
|
|
||||||
import random
|
|
||||||
|
|
||||||
from oslo_config import cfg
|
|
||||||
|
|
||||||
from nova.compute import rpcapi as compute_rpcapi
|
|
||||||
from nova import exception
|
|
||||||
from nova.openstack.common import log as logging
|
|
||||||
from nova.openstack.common.gettextutils import _
|
|
||||||
from nova.scheduler import driver
|
|
||||||
|
|
||||||
CONF = cfg.CONF
|
|
||||||
CONF.import_opt('compute_topic', 'nova.compute.rpcapi')
|
|
||||||
LOG = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class IPScheduler(driver.Scheduler):
|
|
||||||
"""
|
|
||||||
Implements Scheduler as a random node selector based on
|
|
||||||
IP address and hostname prefix.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(IPScheduler, self).__init__(*args, **kwargs)
|
|
||||||
self.compute_rpcapi = compute_rpcapi.ComputeAPI()
|
|
||||||
|
|
||||||
def _filter_hosts(self, request_spec, hosts, filter_properties,
|
|
||||||
hostname_prefix):
|
|
||||||
"""Filter a list of hosts based on hostname prefix."""
|
|
||||||
|
|
||||||
hosts = [host for host in hosts if host.startswith(hostname_prefix)]
|
|
||||||
return hosts
|
|
||||||
|
|
||||||
def _schedule(self, context, topic, request_spec, filter_properties):
|
|
||||||
"""Picks a host that is up at random."""
|
|
||||||
|
|
||||||
elevated = context.elevated()
|
|
||||||
hosts = self.hosts_up(elevated, topic)
|
|
||||||
if not hosts:
|
|
||||||
msg = _("Is the appropriate service running?")
|
|
||||||
raise exception.NoValidHost(reason=msg)
|
|
||||||
|
|
||||||
remote_ip = context.remote_address
|
|
||||||
|
|
||||||
if remote_ip.startswith('10.1'):
|
|
||||||
hostname_prefix = 'doc'
|
|
||||||
elif remote_ip.startswith('10.2'):
|
|
||||||
hostname_prefix = 'ops'
|
|
||||||
else:
|
|
||||||
hostname_prefix = 'dev'
|
|
||||||
|
|
||||||
hosts = self._filter_hosts(request_spec, hosts, filter_properties,
|
|
||||||
hostname_prefix)
|
|
||||||
if not hosts:
|
|
||||||
msg = _("Could not find another compute")
|
|
||||||
raise exception.NoValidHost(reason=msg)
|
|
||||||
|
|
||||||
host = random.choice(hosts)
|
|
||||||
LOG.debug("Request from %(remote_ip)s scheduled to %(host)s" % locals())
|
|
||||||
|
|
||||||
return host
|
|
||||||
|
|
||||||
def select_destinations(self, context, request_spec, filter_properties):
|
|
||||||
"""Selects random destinations."""
|
|
||||||
num_instances = request_spec['num_instances']
|
|
||||||
# NOTE(timello): Returns a list of dicts with 'host', 'nodename' and
|
|
||||||
# 'limits' as keys for compatibility with filter_scheduler.
|
|
||||||
dests = []
|
|
||||||
for i in range(num_instances):
|
|
||||||
host = self._schedule(context, CONF.compute_topic,
|
|
||||||
request_spec, filter_properties)
|
|
||||||
host_state = dict(host=host, nodename=None, limits=None)
|
|
||||||
dests.append(host_state)
|
|
||||||
|
|
||||||
if len(dests) < num_instances:
|
|
||||||
raise exception.NoValidHost(reason='')
|
|
||||||
return dests
|
|
||||||
|
|
||||||
def schedule_run_instance(self, context, request_spec,
|
|
||||||
admin_password, injected_files,
|
|
||||||
requested_networks, is_first_time,
|
|
||||||
filter_properties, legacy_bdm_in_spec):
|
|
||||||
"""Create and run an instance or instances."""
|
|
||||||
instance_uuids = request_spec.get('instance_uuids')
|
|
||||||
for num, instance_uuid in enumerate(instance_uuids):
|
|
||||||
request_spec['instance_properties']['launch_index'] = num
|
|
||||||
try:
|
|
||||||
host = self._schedule(context, CONF.compute_topic,
|
|
||||||
request_spec, filter_properties)
|
|
||||||
updated_instance = driver.instance_update_db(context,
|
|
||||||
instance_uuid)
|
|
||||||
self.compute_rpcapi.run_instance(context,
|
|
||||||
instance=updated_instance, host=host,
|
|
||||||
requested_networks=requested_networks,
|
|
||||||
injected_files=injected_files,
|
|
||||||
admin_password=admin_password,
|
|
||||||
is_first_time=is_first_time,
|
|
||||||
request_spec=request_spec,
|
|
||||||
filter_properties=filter_properties,
|
|
||||||
legacy_bdm_in_spec=legacy_bdm_in_spec)
|
|
||||||
except Exception as ex:
|
|
||||||
# NOTE(vish): we don't reraise the exception here to make sure
|
|
||||||
# that all instances in the request get set to
|
|
||||||
# error properly
|
|
||||||
driver.handle_schedule_error(context, ex, instance_uuid,
|
|
||||||
request_spec)
|
|
||||||
|
|
||||||
There is a lot of useful information in ``context``, ``request_spec``,
|
|
||||||
and ``filter_properties`` that you can use to decide where to schedule
|
|
||||||
the instance. To find out more about what properties are available, you
|
|
||||||
can insert the following log statements into the
|
|
||||||
``schedule_run_instance`` method of the scheduler above:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
LOG.debug("context = %(context)s" % {'context': context.__dict__})
|
|
||||||
LOG.debug("request_spec = %(request_spec)s" % locals())
|
|
||||||
LOG.debug("filter_properties = %(filter_properties)s" % locals())
|
|
||||||
|
|
||||||
#. To plug this scheduler into nova, edit one configuration file,
|
|
||||||
``/etc/nova/nova.conf``:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ vim /etc/nova/nova.conf
|
|
||||||
|
|
||||||
#. Find the ``scheduler_driver`` config and change it like so:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
scheduler_driver=nova.scheduler.ip_scheduler.IPScheduler
|
|
||||||
|
|
||||||
#. Restart the nova scheduler service to make nova use your scheduler.
|
|
||||||
Start by switching to the ``n-sch`` screen:
|
|
||||||
|
|
||||||
#. Press **Ctrl+A** followed by **9**.
|
|
||||||
|
|
||||||
#. Press **Ctrl+A** followed by **N** until you reach the ``n-sch`` screen.
|
|
||||||
|
|
||||||
#. Press **Ctrl+C** to kill the service.
|
|
||||||
|
|
||||||
#. Press **Up Arrow** to bring up the last command.
|
|
||||||
|
|
||||||
#. Press **Enter** to run it.
|
|
||||||
|
|
||||||
#. Test your scheduler with the nova CLI. Start by switching to the
|
|
||||||
``shell`` screen and finish by switching back to the ``n-sch`` screen to
|
|
||||||
check the log output:
|
|
||||||
|
|
||||||
#. Press **Ctrl+A** followed by **0**.
|
|
||||||
|
|
||||||
#. Make sure you are in the ``devstack`` directory:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ cd /root/devstack
|
|
||||||
|
|
||||||
#. Source ``openrc`` to set up your environment variables for the CLI:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ . openrc
|
|
||||||
|
|
||||||
#. Put the image ID for the only installed image into an environment
|
|
||||||
variable:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ IMAGE_ID=`openstack image list | egrep cirros | egrep -v "kernel|ramdisk" | awk '{print $2}'`
|
|
||||||
|
|
||||||
#. Boot a test server:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack server create --flavor 1 --image $IMAGE_ID scheduler-test
|
|
||||||
|
|
||||||
#. Switch back to the ``n-sch`` screen. Among the log statements, you'll
|
|
||||||
see the line:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
2014-01-23 19:57:47.262 DEBUG nova.scheduler.ip_scheduler
|
|
||||||
[req-... demo demo] Request from xx.xx.xx.xx scheduled to devstack-havana
|
|
||||||
_schedule /opt/stack/nova/nova/scheduler/ip_scheduler.py:76
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
Functional testing like this is not a replacement for proper unit
|
|
||||||
and integration testing, but it serves to get you started.
|
|
||||||
|
|
||||||
A similar pattern can be followed in other projects that use the driver
|
|
||||||
architecture. Simply create a module and class that conform to the
|
|
||||||
driver interface and plug it in through configuration. Your code runs
|
|
||||||
when that feature is used and can call out to other services as
|
|
||||||
necessary. No project core code is touched. Look for a "driver" value in
|
|
||||||
the project's ``.conf`` configuration files in ``/etc/<project>`` to
|
|
||||||
identify projects that use a driver architecture.
|
|
||||||
|
|
||||||
When your scheduler is done, we encourage you to open source it and let
|
|
||||||
the community know on the OpenStack mailing list. Perhaps others need
|
|
||||||
the same functionality. They can use your code, provide feedback, and
|
|
||||||
possibly contribute. If enough support exists for it, perhaps you can
|
|
||||||
propose that it be added to the official Compute
|
|
||||||
`schedulers <https://git.openstack.org/cgit/openstack/nova/tree/nova/scheduler>`_.
|
|
@ -1,9 +0,0 @@
|
|||||||
==========
|
|
||||||
Conclusion
|
|
||||||
==========
|
|
||||||
|
|
||||||
When operating an OpenStack cloud, you may discover that your users can
|
|
||||||
be quite demanding. If OpenStack doesn't do what your users need, it may
|
|
||||||
be up to you to fulfill those requirements. This chapter provided you
|
|
||||||
with some options for customization and gave you the tools you need to
|
|
||||||
get started.
|
|
@ -1,8 +0,0 @@
|
|||||||
===================================
|
|
||||||
Customizing the Dashboard (Horizon)
|
|
||||||
===================================
|
|
||||||
|
|
||||||
The dashboard is based on the Python
|
|
||||||
`Django <https://www.djangoproject.com/>`_ web application framework.
|
|
||||||
To know how to build your Dashboard, see `Building a Dashboard using Horizon
|
|
||||||
<https://docs.openstack.org/developer/horizon/tutorials/dashboard.html>`_.
|
|
@ -1,11 +0,0 @@
|
|||||||
===========================================
|
|
||||||
Create an OpenStack Development Environment
|
|
||||||
===========================================
|
|
||||||
|
|
||||||
To create a development environment, you can use DevStack. DevStack is
|
|
||||||
essentially a collection of shell scripts and configuration files that
|
|
||||||
builds an OpenStack development environment for you. You use it to
|
|
||||||
create such an environment for developing a new feature.
|
|
||||||
|
|
||||||
For more information on installing DevStack, see the
|
|
||||||
`DevStack <https://docs.openstack.org/developer/devstack/>`_ website.
|
|
@ -1,341 +0,0 @@
|
|||||||
=============================================
|
|
||||||
Customizing Object Storage (Swift) Middleware
|
|
||||||
=============================================
|
|
||||||
|
|
||||||
OpenStack Object Storage, known as swift when reading the code, is based
|
|
||||||
on the Python `Paste <http://pythonpaste.org/>`_ framework. The best
|
|
||||||
introduction to its architecture is `A Do-It-Yourself
|
|
||||||
Framework <http://pythonpaste.org/do-it-yourself-framework.html>`_.
|
|
||||||
Because of the swift project's use of this framework, you are able to
|
|
||||||
add features to a project by placing some custom code in a project's
|
|
||||||
pipeline without having to change any of the core code.
|
|
||||||
|
|
||||||
Imagine a scenario where you have public access to one of your
|
|
||||||
containers, but what you really want is to restrict access to that to a
|
|
||||||
set of IPs based on a whitelist. In this example, we'll create a piece
|
|
||||||
of middleware for swift that allows access to a container from only a
|
|
||||||
set of IP addresses, as determined by the container's metadata items.
|
|
||||||
Only those IP addresses that you explicitly whitelist using the
|
|
||||||
container's metadata will be able to access the container.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
This example is for illustrative purposes only. It should not be
|
|
||||||
used as a container IP whitelist solution without further
|
|
||||||
development and extensive security testing.
|
|
||||||
|
|
||||||
When you join the screen session that ``stack.sh`` starts with
|
|
||||||
``screen -r stack``, you see a screen for each service running, which
|
|
||||||
can be a few or several, depending on how many services you configured
|
|
||||||
DevStack to run.
|
|
||||||
|
|
||||||
The asterisk * indicates which screen window you are viewing. This
|
|
||||||
example shows we are viewing the key (for keystone) screen window:
|
|
||||||
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
0$ shell 1$ key* 2$ horizon 3$ s-proxy 4$ s-object 5$ s-container 6$ s-account
|
|
||||||
|
|
||||||
The purpose of the screen windows are as follows:
|
|
||||||
|
|
||||||
|
|
||||||
``shell``
|
|
||||||
A shell where you can get some work done
|
|
||||||
|
|
||||||
``key*``
|
|
||||||
The keystone service
|
|
||||||
|
|
||||||
``horizon``
|
|
||||||
The horizon dashboard web application
|
|
||||||
|
|
||||||
``s-{name}``
|
|
||||||
The swift services
|
|
||||||
|
|
||||||
**To create the middleware and plug it in through Paste configuration:**
|
|
||||||
|
|
||||||
All of the code for OpenStack lives in ``/opt/stack``. Go to the swift
|
|
||||||
directory in the ``shell`` screen and edit your middleware module.
|
|
||||||
|
|
||||||
#. Change to the directory where Object Storage is installed:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ cd /opt/stack/swift
|
|
||||||
|
|
||||||
#. Create the ``ip_whitelist.py`` Python source code file:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ vim swift/common/middleware/ip_whitelist.py
|
|
||||||
|
|
||||||
#. Copy the code as shown below into ``ip_whitelist.py``.
|
|
||||||
The following code is a middleware example that
|
|
||||||
restricts access to a container based on IP address as explained at the
|
|
||||||
beginning of the section. Middleware passes the request on to another
|
|
||||||
application. This example uses the swift "swob" library to wrap Web
|
|
||||||
Server Gateway Interface (WSGI) requests and responses into objects for
|
|
||||||
swift to interact with. When you're done, save and close the file.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
|
||||||
# Copyright (c) 2014 OpenStack Foundation
|
|
||||||
# All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
||||||
# not use this file except in compliance with the License. You may obtain
|
|
||||||
# a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
||||||
# License for the specific language governing permissions and limitations
|
|
||||||
# under the License.
|
|
||||||
|
|
||||||
import socket
|
|
||||||
|
|
||||||
from swift.common.utils import get_logger
|
|
||||||
from swift.proxy.controllers.base import get_container_info
|
|
||||||
from swift.common.swob import Request, Response
|
|
||||||
|
|
||||||
class IPWhitelistMiddleware(object):
|
|
||||||
"""
|
|
||||||
IP Whitelist Middleware
|
|
||||||
|
|
||||||
Middleware that allows access to a container from only a set of IP
|
|
||||||
addresses as determined by the container's metadata items that start
|
|
||||||
with the prefix 'allow'. E.G. allow-dev=192.168.0.20
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, app, conf, logger=None):
|
|
||||||
self.app = app
|
|
||||||
|
|
||||||
if logger:
|
|
||||||
self.logger = logger
|
|
||||||
else:
|
|
||||||
self.logger = get_logger(conf, log_route='ip_whitelist')
|
|
||||||
|
|
||||||
self.deny_message = conf.get('deny_message', "IP Denied")
|
|
||||||
self.local_ip = socket.gethostbyname(socket.gethostname())
|
|
||||||
|
|
||||||
def __call__(self, env, start_response):
|
|
||||||
"""
|
|
||||||
WSGI entry point.
|
|
||||||
Wraps env in swob.Request object and passes it down.
|
|
||||||
|
|
||||||
:param env: WSGI environment dictionary
|
|
||||||
:param start_response: WSGI callable
|
|
||||||
"""
|
|
||||||
req = Request(env)
|
|
||||||
|
|
||||||
try:
|
|
||||||
version, account, container, obj = req.split_path(1, 4, True)
|
|
||||||
except ValueError:
|
|
||||||
return self.app(env, start_response)
|
|
||||||
|
|
||||||
container_info = get_container_info(
|
|
||||||
req.environ, self.app, swift_source='IPWhitelistMiddleware')
|
|
||||||
|
|
||||||
remote_ip = env['REMOTE_ADDR']
|
|
||||||
self.logger.debug("Remote IP: %(remote_ip)s",
|
|
||||||
{'remote_ip': remote_ip})
|
|
||||||
|
|
||||||
meta = container_info['meta']
|
|
||||||
allow = {k:v for k,v in meta.iteritems() if k.startswith('allow')}
|
|
||||||
allow_ips = set(allow.values())
|
|
||||||
allow_ips.add(self.local_ip)
|
|
||||||
self.logger.debug("Allow IPs: %(allow_ips)s",
|
|
||||||
{'allow_ips': allow_ips})
|
|
||||||
|
|
||||||
if remote_ip in allow_ips:
|
|
||||||
return self.app(env, start_response)
|
|
||||||
else:
|
|
||||||
self.logger.debug(
|
|
||||||
"IP %(remote_ip)s denied access to Account=%(account)s "
|
|
||||||
"Container=%(container)s. Not in %(allow_ips)s", locals())
|
|
||||||
return Response(
|
|
||||||
status=403,
|
|
||||||
body=self.deny_message,
|
|
||||||
request=req)(env, start_response)
|
|
||||||
|
|
||||||
|
|
||||||
def filter_factory(global_conf, **local_conf):
|
|
||||||
"""
|
|
||||||
paste.deploy app factory for creating WSGI proxy apps.
|
|
||||||
"""
|
|
||||||
conf = global_conf.copy()
|
|
||||||
conf.update(local_conf)
|
|
||||||
|
|
||||||
def ip_whitelist(app):
|
|
||||||
return IPWhitelistMiddleware(app, conf)
|
|
||||||
return ip_whitelist
|
|
||||||
|
|
||||||
|
|
||||||
There is a lot of useful information in ``env`` and ``conf`` that you
|
|
||||||
can use to decide what to do with the request. To find out more about
|
|
||||||
what properties are available, you can insert the following log
|
|
||||||
statement into the ``__init__`` method:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
self.logger.debug("conf = %(conf)s", locals())
|
|
||||||
|
|
||||||
|
|
||||||
and the following log statement into the ``__call__`` method:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
self.logger.debug("env = %(env)s", locals())
|
|
||||||
|
|
||||||
#. To plug this middleware into the swift Paste pipeline, you edit one
|
|
||||||
configuration file, ``/etc/swift/proxy-server.conf``:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ vim /etc/swift/proxy-server.conf
|
|
||||||
|
|
||||||
#. Find the ``[filter:ratelimit]`` section in
|
|
||||||
``/etc/swift/proxy-server.conf``, and copy in the following
|
|
||||||
configuration section after it:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
[filter:ip_whitelist]
|
|
||||||
paste.filter_factory = swift.common.middleware.ip_whitelist:filter_factory
|
|
||||||
# You can override the default log routing for this filter here:
|
|
||||||
# set log_name = ratelimit
|
|
||||||
# set log_facility = LOG_LOCAL0
|
|
||||||
# set log_level = INFO
|
|
||||||
# set log_headers = False
|
|
||||||
# set log_address = /dev/log
|
|
||||||
deny_message = You shall not pass!
|
|
||||||
|
|
||||||
#. Find the ``[pipeline:main]`` section in
|
|
||||||
``/etc/swift/proxy-server.conf``, and add ``ip_whitelist`` after
|
|
||||||
ratelimit to the list like so. When you're done, save and close the
|
|
||||||
file:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
[pipeline:main]
|
|
||||||
pipeline = catch_errors gatekeeper healthcheck proxy-logging cache bulk tempurl ratelimit ip_whitelist ...
|
|
||||||
|
|
||||||
#. Restart the ``swift proxy`` service to make swift use your middleware.
|
|
||||||
Start by switching to the ``swift-proxy`` screen:
|
|
||||||
|
|
||||||
#. Press **Ctrl+A** followed by **3**.
|
|
||||||
|
|
||||||
#. Press **Ctrl+C** to kill the service.
|
|
||||||
|
|
||||||
#. Press **Up Arrow** to bring up the last command.
|
|
||||||
|
|
||||||
#. Press Enter to run it.
|
|
||||||
|
|
||||||
#. Test your middleware with the ``swift`` CLI. Start by switching to the
|
|
||||||
shell screen and finish by switching back to the ``swift-proxy`` screen
|
|
||||||
to check the log output:
|
|
||||||
|
|
||||||
#. Press **Ctrl+A** followed by **0**.
|
|
||||||
|
|
||||||
#. Make sure you're in the ``devstack`` directory:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ cd /root/devstack
|
|
||||||
|
|
||||||
#. Source openrc to set up your environment variables for the CLI:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ . openrc
|
|
||||||
|
|
||||||
#. Create a container called ``middleware-test``:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ swift post middleware-test
|
|
||||||
|
|
||||||
#. Press **Ctrl+A** followed by **3** to check the log output.
|
|
||||||
|
|
||||||
#. Among the log statements you'll see the lines:
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
proxy-server Remote IP: my.instance.ip.address (txn: ...)
|
|
||||||
proxy-server Allow IPs: set(['my.instance.ip.address']) (txn: ...)
|
|
||||||
|
|
||||||
These two statements are produced by our middleware and show that the
|
|
||||||
request was sent from our DevStack instance and was allowed.
|
|
||||||
|
|
||||||
#. Test the middleware from outside DevStack on a remote machine that has
|
|
||||||
access to your DevStack instance:
|
|
||||||
|
|
||||||
#. Install the ``keystone`` and ``swift`` clients on your local machine:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# pip install python-keystoneclient python-swiftclient
|
|
||||||
|
|
||||||
#. Attempt to list the objects in the ``middleware-test`` container:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ swift --os-auth-url=http://my.instance.ip.address:5000/v2.0/ \
|
|
||||||
--os-region-name=RegionOne --os-username=demo:demo \
|
|
||||||
--os-password=devstack list middleware-test
|
|
||||||
Container GET failed: http://my.instance.ip.address:8080/v1/AUTH_.../
|
|
||||||
middleware-test?format=json 403 Forbidden You shall not pass!
|
|
||||||
|
|
||||||
#. Press **Ctrl+A** followed by **3** to check the log output. Look at the
|
|
||||||
swift log statements again, and among the log statements, you'll see the
|
|
||||||
lines:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
proxy-server Authorizing from an overriding middleware (i.e: tempurl) (txn: ...)
|
|
||||||
proxy-server ... IPWhitelistMiddleware
|
|
||||||
proxy-server Remote IP: my.local.ip.address (txn: ...)
|
|
||||||
proxy-server Allow IPs: set(['my.instance.ip.address']) (txn: ...)
|
|
||||||
proxy-server IP my.local.ip.address denied access to Account=AUTH_... \
|
|
||||||
Container=None. Not in set(['my.instance.ip.address']) (txn: ...)
|
|
||||||
|
|
||||||
Here we can see that the request was denied because the remote IP
|
|
||||||
address wasn't in the set of allowed IPs.
|
|
||||||
|
|
||||||
#. Back in your DevStack instance on the shell screen, add some metadata to
|
|
||||||
your container to allow the request from the remote machine:
|
|
||||||
|
|
||||||
#. Press **Ctrl+A** followed by **0**.
|
|
||||||
|
|
||||||
#. Add metadata to the container to allow the IP:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ swift post --meta allow-dev:my.local.ip.address middleware-test
|
|
||||||
|
|
||||||
#. Now try the command from Step 10 again and it succeeds. There are no
|
|
||||||
objects in the container, so there is nothing to list; however, there is
|
|
||||||
also no error to report.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
Functional testing like this is not a replacement for proper unit
|
|
||||||
and integration testing, but it serves to get you started.
|
|
||||||
|
|
||||||
You can follow a similar pattern in other projects that use the Python
|
|
||||||
Paste framework. Simply create a middleware module and plug it in
|
|
||||||
through configuration. The middleware runs in sequence as part of that
|
|
||||||
project's pipeline and can call out to other services as necessary. No
|
|
||||||
project core code is touched. Look for a ``pipeline`` value in the
|
|
||||||
project's ``conf`` or ``ini`` configuration files in ``/etc/<project>``
|
|
||||||
to identify projects that use Paste.
|
|
||||||
|
|
||||||
When your middleware is done, we encourage you to open source it and let
|
|
||||||
the community know on the OpenStack mailing list. Perhaps others need
|
|
||||||
the same functionality. They can use your code, provide feedback, and
|
|
||||||
possibly contribute. If enough support exists for it, perhaps you can
|
|
||||||
propose that it be added to the official swift
|
|
||||||
`middleware <https://git.openstack.org/cgit/openstack/swift/tree/swift/common/middleware>`_.
|
|
@ -1,12 +0,0 @@
|
|||||||
=====================
|
|
||||||
Provision an instance
|
|
||||||
=====================
|
|
||||||
|
|
||||||
To help understand how OpenStack works, this section describes the
|
|
||||||
end-to-end process and interaction of components when provisioning an instance
|
|
||||||
on OpenStack.
|
|
||||||
|
|
||||||
**Provision an instance**
|
|
||||||
|
|
||||||
.. figure:: figures/provision-an-instance.png
|
|
||||||
:width: 100%
|
|
@ -1,45 +0,0 @@
|
|||||||
=============
|
|
||||||
Customization
|
|
||||||
=============
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
ops-customize-provision-instance.rst
|
|
||||||
ops-customize-development.rst
|
|
||||||
ops-customize-objectstorage.rst
|
|
||||||
ops-customize-compute.rst
|
|
||||||
ops-customize-dashboard.rst
|
|
||||||
ops-customize-conclusion.rst
|
|
||||||
|
|
||||||
OpenStack might not do everything you need it to do out of the box. To
|
|
||||||
add a new feature, you can follow different paths.
|
|
||||||
|
|
||||||
To take the first path, you can modify the OpenStack code directly.
|
|
||||||
Learn `how to contribute
|
|
||||||
<https://wiki.openstack.org/wiki/How_To_Contribute>`_,
|
|
||||||
follow the `Developer's Guide
|
|
||||||
<https://docs.openstack.org/infra/manual/developers.html>`_, make your
|
|
||||||
changes, and contribute them back to the upstream OpenStack project.
|
|
||||||
This path is recommended if the feature you need requires deep
|
|
||||||
integration with an existing project. The community is always open to
|
|
||||||
contributions and welcomes new functionality that follows the
|
|
||||||
feature-development guidelines. This path still requires you to use
|
|
||||||
DevStack for testing your feature additions, so this chapter walks you
|
|
||||||
through the DevStack environment.
|
|
||||||
|
|
||||||
For the second path, you can write new features and plug them in using
|
|
||||||
changes to a configuration file. If the project where your feature would
|
|
||||||
need to reside uses the Python Paste framework, you can create
|
|
||||||
middleware for it and plug it in through configuration. There may also
|
|
||||||
be specific ways of customizing a project, such as creating a new
|
|
||||||
scheduler driver for Compute or a custom tab for the dashboard.
|
|
||||||
|
|
||||||
This chapter focuses on the second path for customizing OpenStack by
|
|
||||||
providing two examples for writing new features.
|
|
||||||
The first example shows how to modify Object Storage service (swift)
|
|
||||||
middleware to add a new feature, and the second example provides a new
|
|
||||||
scheduler feature for Compute service (nova).
|
|
||||||
To customize OpenStack this way you need a development environment.
|
|
||||||
The best way to get an environment up and running quickly is to run
|
|
||||||
DevStack within your cloud.
|
|
@ -1,299 +0,0 @@
|
|||||||
.. _legal-requirements:
|
|
||||||
|
|
||||||
======================================
|
|
||||||
Factors affecting OpenStack deployment
|
|
||||||
======================================
|
|
||||||
|
|
||||||
Security requirements
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
When deploying OpenStack in an enterprise as a private cloud, it is
|
|
||||||
usually behind the firewall and within the trusted network alongside
|
|
||||||
existing systems. Users are employees that are bound by the
|
|
||||||
company security requirements. This tends to drive most of the security
|
|
||||||
domains towards a more trusted model. However, when deploying OpenStack
|
|
||||||
in a public facing role, no assumptions can be made and the attack vectors
|
|
||||||
significantly increase.
|
|
||||||
|
|
||||||
Consider the following security implications and requirements:
|
|
||||||
|
|
||||||
* Managing the users for both public and private clouds. The Identity service
|
|
||||||
allows for LDAP to be part of the authentication process. This may ease user
|
|
||||||
management if integrating into existing systems.
|
|
||||||
|
|
||||||
* User authentication requests include sensitive information including
|
|
||||||
usernames, passwords, and authentication tokens. It is strongly recommended
|
|
||||||
to place API services behind hardware that performs SSL termination.
|
|
||||||
|
|
||||||
* Negative or hostile users who would attack or compromise the security
|
|
||||||
of your deployment regardless of firewalls or security agreements.
|
|
||||||
|
|
||||||
* Attack vectors increase further in a public facing OpenStack deployment.
|
|
||||||
For example, the API endpoints and the software behind it become
|
|
||||||
vulnerable to hostile entities attempting to gain unauthorized access
|
|
||||||
or prevent access to services. You should provide appropriate filtering and
|
|
||||||
periodic security auditing.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
Be mindful of consistency when utilizing third party
|
|
||||||
clouds to explore authentication options.
|
|
||||||
|
|
||||||
For more information OpenStack Security, see the `OpenStack Security
|
|
||||||
Guide <https://docs.openstack.org/security-guide/>`_.
|
|
||||||
|
|
||||||
Security domains
|
|
||||||
----------------
|
|
||||||
|
|
||||||
A security domain comprises of users, applications, servers or networks
|
|
||||||
that share common trust requirements and expectations within a system.
|
|
||||||
Typically they have the same authentication and authorization
|
|
||||||
requirements and users.
|
|
||||||
|
|
||||||
Security domains include:
|
|
||||||
|
|
||||||
Public security domains
|
|
||||||
The public security domain can refer to the internet as a whole or
|
|
||||||
networks over which you have no authority. This domain is considered
|
|
||||||
untrusted. For example, in a hybrid cloud deployment, any information
|
|
||||||
traversing between and beyond the clouds is in the public domain and
|
|
||||||
untrustworthy.
|
|
||||||
|
|
||||||
Guest security domains
|
|
||||||
The guest security domain handles compute data generated by instances
|
|
||||||
on the cloud, but not services that support the operation of the
|
|
||||||
cloud, such as API calls. Public cloud providers and private cloud
|
|
||||||
providers who do not have stringent controls on instance use or who
|
|
||||||
allow unrestricted internet access to instances should consider this
|
|
||||||
domain to be untrusted. Private cloud providers may want to consider
|
|
||||||
this network as internal and therefore trusted only if they have
|
|
||||||
controls in place to assert that they trust instances and all their
|
|
||||||
tenants.
|
|
||||||
|
|
||||||
Management security domains
|
|
||||||
The management security domain is where services interact. Sometimes
|
|
||||||
referred to as the control plane, the networks in this domain
|
|
||||||
transport confidential data such as configuration parameters, user
|
|
||||||
names, and passwords. In most deployments this domain is considered
|
|
||||||
trusted when it is behind an organization's firewall.
|
|
||||||
|
|
||||||
Data security domains
|
|
||||||
The data security domain is primarily concerned with information
|
|
||||||
pertaining to the storage services within OpenStack. The data
|
|
||||||
that crosses this network has high integrity and confidentiality
|
|
||||||
requirements and, depending on the type of deployment, may also have
|
|
||||||
strong availability requirements. The trust level of this network is
|
|
||||||
heavily dependent on other deployment decisions.
|
|
||||||
|
|
||||||
These security domains can be individually or collectively mapped to an
|
|
||||||
OpenStack deployment. The cloud operator should be aware of the appropriate
|
|
||||||
security concerns. Security domains should be mapped out against your specific
|
|
||||||
OpenStack deployment topology. The domains and their trust requirements depend
|
|
||||||
upon whether the cloud instance is public, private, or hybrid.
|
|
||||||
|
|
||||||
Hypervisor security
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
The hypervisor also requires a security assessment. In a
|
|
||||||
public cloud, organizations typically do not have control
|
|
||||||
over the choice of hypervisor. Properly securing your
|
|
||||||
hypervisor is important. Attacks made upon the
|
|
||||||
unsecured hypervisor are called a **hypervisor breakout**.
|
|
||||||
Hypervisor breakout describes the event of a
|
|
||||||
compromised or malicious instance breaking out of the resource
|
|
||||||
controls of the hypervisor and gaining access to the bare
|
|
||||||
metal operating system and hardware resources.
|
|
||||||
|
|
||||||
Hypervisor security is not an issue if the security of instances is not
|
|
||||||
important. However, enterprises can minimize vulnerability by avoiding
|
|
||||||
hardware sharing with others in a public cloud.
|
|
||||||
|
|
||||||
Baremetal security
|
|
||||||
------------------
|
|
||||||
|
|
||||||
There are other services worth considering that provide a
|
|
||||||
bare metal instance instead of a cloud. In other cases, it is
|
|
||||||
possible to replicate a second private cloud by integrating
|
|
||||||
with a private Cloud-as-a-Service deployment. The
|
|
||||||
organization does not buy the hardware, but also does not share
|
|
||||||
with other tenants. It is also possible to use a provider that
|
|
||||||
hosts a bare-metal public cloud instance for which the
|
|
||||||
hardware is dedicated only to one customer, or a provider that
|
|
||||||
offers private Cloud-as-a-Service.
|
|
||||||
|
|
||||||
.. important::
|
|
||||||
|
|
||||||
Each cloud implements services differently. Understand the security
|
|
||||||
requirements of every cloud that handles the organization's data or
|
|
||||||
workloads.
|
|
||||||
|
|
||||||
Networking security
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
Consider security implications and requirements before designing the
|
|
||||||
physical and logical network topologies. Make sure that the networks are
|
|
||||||
properly segregated and traffic flows are going to the correct
|
|
||||||
destinations without crossing through locations that are undesirable.
|
|
||||||
Consider the following factors:
|
|
||||||
|
|
||||||
* Firewalls
|
|
||||||
* Overlay interconnects for joining separated tenant networks
|
|
||||||
* Routing through or avoiding specific networks
|
|
||||||
|
|
||||||
How networks attach to hypervisors can expose security
|
|
||||||
vulnerabilities. To mitigate hypervisor breakouts, separate networks
|
|
||||||
from other systems and schedule instances for the
|
|
||||||
network onto dedicated Compute nodes. This prevents attackers
|
|
||||||
from having access to the networks from a compromised instance.
|
|
||||||
|
|
||||||
Multi-site security
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
Securing a multi-site OpenStack installation brings
|
|
||||||
several challenges. Tenants may expect a tenant-created network
|
|
||||||
to be secure. In a multi-site installation the use of a
|
|
||||||
non-private connection between sites may be required. This may
|
|
||||||
mean that traffic would be visible to third parties and, in
|
|
||||||
cases where an application requires security, this issue
|
|
||||||
requires mitigation. In these instances, install a VPN or
|
|
||||||
encrypted connection between sites to conceal sensitive traffic.
|
|
||||||
|
|
||||||
Identity is another security consideration. Authentication
|
|
||||||
centralization provides a single authentication point for
|
|
||||||
users across the deployment, and a single administration point
|
|
||||||
for traditional create, read, update, and delete operations.
|
|
||||||
Centralized authentication is also useful for auditing purposes because
|
|
||||||
all authentication tokens originate from the same source.
|
|
||||||
|
|
||||||
Tenants in multi-site installations need isolation
|
|
||||||
from each other. The main challenge is ensuring tenant networks
|
|
||||||
function across regions which is not currently supported in OpenStack
|
|
||||||
Networking (neutron). Therefore an external system may be required
|
|
||||||
to manage mapping. Tenant networks may contain sensitive information requiring
|
|
||||||
accurate and consistent mapping to ensure that a tenant in one site
|
|
||||||
does not connect to a different tenant in another site.
|
|
||||||
|
|
||||||
Legal requirements
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Using remote resources for collection, processing, storage,
|
|
||||||
and retrieval provides potential benefits to businesses.
|
|
||||||
With the rapid growth of data within organizations, businesses
|
|
||||||
need to be proactive about their data storage strategies from
|
|
||||||
a compliance point of view.
|
|
||||||
|
|
||||||
Most countries have legislative and regulatory requirements governing
|
|
||||||
the storage and management of data in cloud environments. This is
|
|
||||||
particularly relevant for public, community and hybrid cloud models,
|
|
||||||
to ensure data privacy and protection for organizations using a
|
|
||||||
third party cloud provider.
|
|
||||||
|
|
||||||
Common areas of regulation include:
|
|
||||||
|
|
||||||
* Data retention policies ensuring storage of persistent data
|
|
||||||
and records management to meet data archival requirements.
|
|
||||||
* Data ownership policies governing the possession and
|
|
||||||
responsibility for data.
|
|
||||||
* Data sovereignty policies governing the storage of data in
|
|
||||||
foreign countries or otherwise separate jurisdictions.
|
|
||||||
* Data compliance policies governing certain types of
|
|
||||||
information needing to reside in certain locations due to
|
|
||||||
regulatory issues - and more importantly, cannot reside in
|
|
||||||
other locations for the same reason.
|
|
||||||
* Data location policies ensuring that the services deployed
|
|
||||||
to the cloud are used according to laws and regulations in place
|
|
||||||
for the employees, foreign subsidiaries, or third parties.
|
|
||||||
* Disaster recovery policies ensuring regular data backups and
|
|
||||||
relocation of cloud applications to another supplier in scenarios
|
|
||||||
where a provider may go out of business, or their data center could
|
|
||||||
become inoperable.
|
|
||||||
* Security breach policies governing the ways to notify individuals
|
|
||||||
through cloud provider's systems or other means if their personal
|
|
||||||
data gets compromised in any way.
|
|
||||||
* Industry standards policy governing additional requirements on what
|
|
||||||
type of cardholder data may or may not be stored and how it is to
|
|
||||||
be protected.
|
|
||||||
|
|
||||||
This is an example of such legal frameworks:
|
|
||||||
|
|
||||||
Data storage regulations in Europe are currently driven by provisions of
|
|
||||||
the `Data protection framework <http://ec.europa.eu/justice/data-protection/>`_.
|
|
||||||
`Financial Industry Regulatory Authority
|
|
||||||
<http://www.finra.org/Industry/Regulation/FINRARules/>`_ works on this in
|
|
||||||
the United States.
|
|
||||||
|
|
||||||
Privacy and security are spread over different industry-specific laws and
|
|
||||||
regulations:
|
|
||||||
|
|
||||||
* Health Insurance Portability and Accountability Act (HIPAA)
|
|
||||||
* Gramm-Leach-Bliley Act (GLBA)
|
|
||||||
* Payment Card Industry Data Security Standard (PCI DSS)
|
|
||||||
* Family Educational Rights and Privacy Act (FERPA)
|
|
||||||
|
|
||||||
Cloud security architecture
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Cloud security architecture should recognize the issues
|
|
||||||
that arise with security management, which addresses these issues
|
|
||||||
with security controls. Cloud security controls are put in place to
|
|
||||||
safeguard any weaknesses in the system, and reduce the effect of an attack.
|
|
||||||
|
|
||||||
The following security controls are described below.
|
|
||||||
|
|
||||||
Deterrent controls:
|
|
||||||
Typically reduce the threat level by informing potential attackers
|
|
||||||
that there will be adverse consequences for them if they proceed.
|
|
||||||
|
|
||||||
Preventive controls:
|
|
||||||
Strengthen the system against incidents, generally by reducing
|
|
||||||
if not actually eliminating vulnerabilities.
|
|
||||||
|
|
||||||
Detective controls:
|
|
||||||
Intended to detect and react appropriately to any incidents
|
|
||||||
that occur. System and network security monitoring, including
|
|
||||||
intrusion detection and prevention arrangements, are typically
|
|
||||||
employed to detect attacks on cloud systems and the supporting
|
|
||||||
communications infrastructure.
|
|
||||||
|
|
||||||
Corrective controls:
|
|
||||||
Reduce the consequences of an incident, normally by limiting
|
|
||||||
the damage. They come into effect during or after an incident.
|
|
||||||
Restoring system backups in order to rebuild a compromised
|
|
||||||
system is an example of a corrective control.
|
|
||||||
|
|
||||||
For more information, see See also `NIST Special Publication 800-53
|
|
||||||
<https://web.nvd.nist.gov/view/800-53/home>`_.
|
|
||||||
|
|
||||||
|
|
||||||
Software licensing
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The many different forms of license agreements for software are often written
|
|
||||||
with the use of dedicated hardware in mind. This model is relevant for the
|
|
||||||
cloud platform itself, including the hypervisor operating system, supporting
|
|
||||||
software for items such as database, RPC, backup, and so on. Consideration
|
|
||||||
must be made when offering Compute service instances and applications to end
|
|
||||||
users of the cloud, since the license terms for that software may need some
|
|
||||||
adjustment to be able to operate economically in the cloud.
|
|
||||||
|
|
||||||
Multi-site OpenStack deployments present additional licensing
|
|
||||||
considerations over and above regular OpenStack clouds, particularly
|
|
||||||
where site licenses are in use to provide cost efficient access to
|
|
||||||
software licenses. The licensing for host operating systems, guest
|
|
||||||
operating systems, OpenStack distributions (if applicable),
|
|
||||||
software-defined infrastructure including network controllers and
|
|
||||||
storage systems, and even individual applications need to be evaluated.
|
|
||||||
|
|
||||||
Topics to consider include:
|
|
||||||
|
|
||||||
* The definition of what constitutes a site in the relevant licenses,
|
|
||||||
as the term does not necessarily denote a geographic or otherwise
|
|
||||||
physically isolated location.
|
|
||||||
|
|
||||||
* Differentiations between "hot" (active) and "cold" (inactive) sites,
|
|
||||||
where significant savings may be made in situations where one site is
|
|
||||||
a cold standby for disaster recovery purposes only.
|
|
||||||
|
|
||||||
* Certain locations might require local vendors to provide support and
|
|
||||||
services for each site which may vary with the licensing agreement in
|
|
||||||
place.
|
|
@ -1,602 +0,0 @@
|
|||||||
===============
|
|
||||||
Lay of the Land
|
|
||||||
===============
|
|
||||||
|
|
||||||
This chapter helps you set up your working environment and use it to
|
|
||||||
take a look around your cloud.
|
|
||||||
|
|
||||||
Using the OpenStack Dashboard for Administration
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
As a cloud administrative user, you can use the OpenStack dashboard to
|
|
||||||
create and manage projects, users, images, and flavors. Users are
|
|
||||||
allowed to create and manage images within specified projects and to
|
|
||||||
share images, depending on the Image service configuration. Typically,
|
|
||||||
the policy configuration allows admin users only to set quotas and
|
|
||||||
create and manage services. The dashboard provides an :guilabel:`Admin`
|
|
||||||
tab with a :guilabel:`System Panel` and an :guilabel:`Identity` tab.
|
|
||||||
These interfaces give you access to system information and usage as
|
|
||||||
well as to settings for configuring what
|
|
||||||
end users can do. Refer to the `OpenStack Administrator
|
|
||||||
Guide <https://docs.openstack.org/admin-guide/dashboard.html>`__ for
|
|
||||||
detailed how-to information about using the dashboard as an admin user.
|
|
||||||
|
|
||||||
Command-Line Tools
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
We recommend using a combination of the OpenStack command-line interface
|
|
||||||
(CLI) tools and the OpenStack dashboard for administration. Some users
|
|
||||||
with a background in other cloud technologies may be using the EC2
|
|
||||||
Compatibility API, which uses naming conventions somewhat different from
|
|
||||||
the native API.
|
|
||||||
|
|
||||||
The pip utility is used to manage package installation from the PyPI
|
|
||||||
archive and is available in the python-pip package in most Linux
|
|
||||||
distributions. While each OpenStack project has its own client, they are
|
|
||||||
being deprecated in favour of a common OpenStack client. It is generally
|
|
||||||
recommended to install the OpenStack client.
|
|
||||||
|
|
||||||
.. tip::
|
|
||||||
|
|
||||||
To perform testing and orchestration, it is usually easier to install the
|
|
||||||
OpenStack CLI tools in a dedicated VM in the cloud. We recommend
|
|
||||||
that you keep the VM installation simple. All the tools should be installed
|
|
||||||
from a single OpenStack release version. If you need to run tools from
|
|
||||||
multiple OpenStack releases, then we recommend that you run with multiple
|
|
||||||
VMs that are each running a dedicated version.
|
|
||||||
|
|
||||||
Install OpenStack command-line clients
|
|
||||||
--------------------------------------
|
|
||||||
|
|
||||||
For instructions on installing, upgrading, or removing command-line clients,
|
|
||||||
see the `Install the OpenStack command-line clients
|
|
||||||
<https://docs.openstack.org/user-guide/common/cli-install-openstack-command-line-clients.html>`_
|
|
||||||
section in OpenStack End User Guide.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
If you support the EC2 API on your cloud, you should also install the
|
|
||||||
euca2ools package or some other EC2 API tool so that you can get the
|
|
||||||
same view your users have. Using EC2 API-based tools is mostly out of
|
|
||||||
the scope of this guide, though we discuss getting credentials for use
|
|
||||||
with it.
|
|
||||||
|
|
||||||
Administrative Command-Line Tools
|
|
||||||
---------------------------------
|
|
||||||
|
|
||||||
There are also several :command:`*-manage` command-line tools. These are
|
|
||||||
installed with the project's services on the cloud controller and do not
|
|
||||||
need to be installed separately:
|
|
||||||
|
|
||||||
* :command:`nova-manage`
|
|
||||||
* :command:`glance-manage`
|
|
||||||
* :command:`keystone-manage`
|
|
||||||
* :command:`cinder-manage`
|
|
||||||
|
|
||||||
Unlike the CLI tools mentioned above, the :command:`*-manage` tools must
|
|
||||||
be run from the cloud controller, as root, because they need read access
|
|
||||||
to the config files such as ``/etc/nova/nova.conf`` and to make queries
|
|
||||||
directly against the database rather than against the OpenStack
|
|
||||||
:term:`API endpoints <API endpoint>`.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
The existence of the ``*-manage`` tools is a legacy issue. It is a
|
|
||||||
goal of the OpenStack project to eventually migrate all of the
|
|
||||||
remaining functionality in the ``*-manage`` tools into the API-based
|
|
||||||
tools. Until that day, you need to SSH into the
|
|
||||||
:term:`cloud controller node` to perform some maintenance operations
|
|
||||||
that require one of the ``*-manage`` tools.
|
|
||||||
|
|
||||||
Getting Credentials
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
You must have the appropriate credentials if you want to use the
|
|
||||||
command-line tools to make queries against your OpenStack cloud. By far,
|
|
||||||
the easiest way to obtain :term:`authentication` credentials to use with
|
|
||||||
command-line clients is to use the OpenStack dashboard. Select
|
|
||||||
:guilabel:`Project`, click the :guilabel:`Project` tab, and click
|
|
||||||
:guilabel:`Access & Security` on the :guilabel:`Compute` category.
|
|
||||||
On the :guilabel:`Access & Security` page, click the :guilabel:`API Access`
|
|
||||||
tab to display two buttons, :guilabel:`Download OpenStack RC File` and
|
|
||||||
:guilabel:`Download EC2 Credentials`, which let you generate files that
|
|
||||||
you can source in your shell to populate the environment variables the
|
|
||||||
command-line tools require to know where your service endpoints and your
|
|
||||||
authentication information are. The user you logged in to the dashboard
|
|
||||||
dictates the filename for the openrc file, such as ``demo-openrc.sh``.
|
|
||||||
When logged in as admin, the file is named ``admin-openrc.sh``.
|
|
||||||
|
|
||||||
The generated file looks something like this:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# To use an OpenStack cloud you need to authenticate against the Identity
|
|
||||||
# service named keystone, which returns a **Token** and **Service Catalog**.
|
|
||||||
# The catalog contains the endpoints for all services the user/tenant has
|
|
||||||
# access to - such as Compute, Image Service, Identity, Object Storage, Block
|
|
||||||
# Storage, and Networking (code-named nova, glance, keystone, swift,
|
|
||||||
# cinder, and neutron).
|
|
||||||
#
|
|
||||||
# *NOTE*: Using the 3 *Identity API* does not necessarily mean any other
|
|
||||||
# OpenStack API is version 3. For example, your cloud provider may implement
|
|
||||||
# Image API v1.1, Block Storage API v2, and Compute API v2.0. OS_AUTH_URL is
|
|
||||||
# only for the Identity API served through keystone.
|
|
||||||
export OS_AUTH_URL=http://203.0.113.10:5000/v3
|
|
||||||
|
|
||||||
# With the addition of Keystone we have standardized on the term **project**
|
|
||||||
# as the entity that owns the resources.
|
|
||||||
export OS_PROJECT_ID=98333aba48e756fa8f629c83a818ad57
|
|
||||||
export OS_PROJECT_NAME="test-project"
|
|
||||||
export OS_USER_DOMAIN_NAME="default"
|
|
||||||
if [ -z "$OS_USER_DOMAIN_NAME" ]; then unset OS_USER_DOMAIN_NAME; fi
|
|
||||||
|
|
||||||
# In addition to the owning entity (tenant), OpenStack stores the entity
|
|
||||||
# performing the action as the **user**.
|
|
||||||
export OS_USERNAME="demo"
|
|
||||||
|
|
||||||
# With Keystone you pass the keystone password.
|
|
||||||
echo "Please enter your OpenStack Password for project $OS_PROJECT_NAME as user $OS_USERNAME: "
|
|
||||||
read -sr OS_PASSWORD_INPUT
|
|
||||||
export OS_PASSWORD=$OS_PASSWORD_INPUT
|
|
||||||
|
|
||||||
# If your configuration has multiple regions, we set that information here.
|
|
||||||
# OS_REGION_NAME is optional and only valid in certain environments.
|
|
||||||
export OS_REGION_NAME="RegionOne"
|
|
||||||
# Don't leave a blank variable, unset it if it was empty
|
|
||||||
if [ -z "$OS_REGION_NAME" ]; then unset OS_REGION_NAME; fi
|
|
||||||
|
|
||||||
export OS_INTERFACE=public
|
|
||||||
export OS_IDENTITY_API_VERSION=3
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
This does not save your password in plain text, which is a good
|
|
||||||
thing. But when you source or run the script, it prompts you for
|
|
||||||
your password and then stores your response in the environment
|
|
||||||
variable ``OS_PASSWORD``. It is important to note that this does
|
|
||||||
require interactivity. It is possible to store a value directly in
|
|
||||||
the script if you require a noninteractive operation, but you then
|
|
||||||
need to be extremely cautious with the security and permissions of
|
|
||||||
this file.
|
|
||||||
|
|
||||||
EC2 compatibility credentials can be downloaded by selecting
|
|
||||||
:guilabel:`Project`, then :guilabel:`Compute`, then
|
|
||||||
:guilabel:`Access & Security`, then :guilabel:`API Access` to display the
|
|
||||||
:guilabel:`Download EC2 Credentials` button. Click the button to generate
|
|
||||||
a ZIP file with server x509 certificates and a shell script fragment.
|
|
||||||
Create a new directory in a secure location because these are live credentials
|
|
||||||
containing all the authentication information required to access your
|
|
||||||
cloud identity, unlike the default ``user-openrc``. Extract the ZIP file
|
|
||||||
here. You should have ``cacert.pem``, ``cert.pem``, ``ec2rc.sh``, and
|
|
||||||
``pk.pem``. The ``ec2rc.sh`` is similar to this:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
NOVARC=$(readlink -f "${BASH_SOURCE:-${0}}" 2>/dev/null) ||\
|
|
||||||
NOVARC=$(python -c 'import os,sys; \
|
|
||||||
print os.path.abspath(os.path.realpath(sys.argv[1]))' "${BASH_SOURCE:-${0}}")
|
|
||||||
NOVA_KEY_DIR=${NOVARC%/*}
|
|
||||||
export EC2_ACCESS_KEY=df7f93ec47e84ef8a347bbb3d598449a
|
|
||||||
export EC2_SECRET_KEY=ead2fff9f8a344e489956deacd47e818
|
|
||||||
export EC2_URL=http://203.0.113.10:8773/services/Cloud
|
|
||||||
export EC2_USER_ID=42 # nova does not use user id, but bundling requires it
|
|
||||||
export EC2_PRIVATE_KEY=${NOVA_KEY_DIR}/pk.pem
|
|
||||||
export EC2_CERT=${NOVA_KEY_DIR}/cert.pem
|
|
||||||
export NOVA_CERT=${NOVA_KEY_DIR}/cacert.pem
|
|
||||||
export EUCALYPTUS_CERT=${NOVA_CERT} # euca-bundle-image seems to require this
|
|
||||||
|
|
||||||
alias ec2-bundle-image="ec2-bundle-image --cert $EC2_CERT --privatekey \
|
|
||||||
$EC2_PRIVATE_KEY --user 42 --ec2cert $NOVA_CERT"
|
|
||||||
alias ec2-upload-bundle="ec2-upload-bundle -a $EC2_ACCESS_KEY -s \
|
|
||||||
$EC2_SECRET_KEY --url $S3_URL --ec2cert $NOVA_CERT"
|
|
||||||
|
|
||||||
To put the EC2 credentials into your environment, source the
|
|
||||||
``ec2rc.sh`` file.
|
|
||||||
|
|
||||||
Inspecting API Calls
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
The command-line tools can be made to show the OpenStack API calls they
|
|
||||||
make by passing the ``--debug`` flag to them. For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack --debug server list
|
|
||||||
|
|
||||||
This example shows the HTTP requests from the client and the responses
|
|
||||||
from the endpoints, which can be helpful in creating custom tools
|
|
||||||
written to the OpenStack API.
|
|
||||||
|
|
||||||
Using cURL for further inspection
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
Underlying the use of the command-line tools is the OpenStack API, which
|
|
||||||
is a RESTful API that runs over HTTP. There may be cases where you want
|
|
||||||
to interact with the API directly or need to use it because of a
|
|
||||||
suspected bug in one of the CLI tools. The best way to do this is to use
|
|
||||||
a combination of `cURL <http://curl.haxx.se/>`_ and another tool,
|
|
||||||
such as `jq <http://stedolan.github.io/jq/>`_, to parse the JSON from
|
|
||||||
the responses.
|
|
||||||
|
|
||||||
The first thing you must do is authenticate with the cloud using your
|
|
||||||
credentials to get an :term:`authentication token`.
|
|
||||||
|
|
||||||
Your credentials are a combination of username, password, and tenant
|
|
||||||
(project). You can extract these values from the ``openrc.sh`` discussed
|
|
||||||
above. The token allows you to interact with your other service
|
|
||||||
endpoints without needing to reauthenticate for every request. Tokens
|
|
||||||
are typically good for 24 hours, and when the token expires, you are
|
|
||||||
alerted with a 401 (Unauthorized) response and you can request another
|
|
||||||
token.
|
|
||||||
|
|
||||||
#. Look at your OpenStack service :term:`catalog`:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ curl -s -X POST http://203.0.113.10:35357/v2.0/tokens \
|
|
||||||
-d '{"auth": {"passwordCredentials": {"username":"test-user", "password":"test-password"}, "tenantName":"test-project"}}' \
|
|
||||||
-H "Content-type: application/json" | jq .
|
|
||||||
|
|
||||||
#. Read through the JSON response to get a feel for how the catalog is
|
|
||||||
laid out.
|
|
||||||
|
|
||||||
To make working with subsequent requests easier, store the token in
|
|
||||||
an environment variable:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ TOKEN=`curl -s -X POST http://203.0.113.10:35357/v2.0/tokens \
|
|
||||||
-d '{"auth": {"passwordCredentials": {"username":"test-user", "password":"test-password"}, "tenantName":"test-project"}}' \
|
|
||||||
-H "Content-type: application/json" | jq -r .access.token.id`
|
|
||||||
|
|
||||||
Now you can refer to your token on the command line as ``$TOKEN``.
|
|
||||||
|
|
||||||
#. Pick a service endpoint from your service catalog, such as compute.
|
|
||||||
Try a request, for example, listing instances (servers):
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ curl -s \
|
|
||||||
-H "X-Auth-Token: $TOKEN" \
|
|
||||||
http://203.0.113.10:8774/v2.0/98333aba48e756fa8f629c83a818ad57/servers | jq .
|
|
||||||
|
|
||||||
To discover how API requests should be structured, read the `OpenStack
|
|
||||||
API Reference <https://developer.openstack.org/api-guide/quick-start/index.html>`_. To chew
|
|
||||||
through the responses using jq, see the `jq
|
|
||||||
Manual <http://stedolan.github.io/jq/manual/>`_.
|
|
||||||
|
|
||||||
The ``-s flag`` used in the cURL commands above are used to prevent
|
|
||||||
the progress meter from being shown. If you are having trouble running
|
|
||||||
cURL commands, you'll want to remove it. Likewise, to help you
|
|
||||||
troubleshoot cURL commands, you can include the ``-v`` flag to show you
|
|
||||||
the verbose output. There are many more extremely useful features in
|
|
||||||
cURL; refer to the man page for all the options.
|
|
||||||
|
|
||||||
Servers and Services
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
As an administrator, you have a few ways to discover what your OpenStack
|
|
||||||
cloud looks like simply by using the OpenStack tools available. This
|
|
||||||
section gives you an idea of how to get an overview of your cloud, its
|
|
||||||
shape, size, and current state.
|
|
||||||
|
|
||||||
First, you can discover what servers belong to your OpenStack cloud by
|
|
||||||
running:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack compute service list --long
|
|
||||||
|
|
||||||
The output looks like the following:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
+----+------------------+-------------------+------+---------+-------+----------------------------+-----------------+
|
|
||||||
| Id | Binary | Host | Zone | Status | State | Updated_at | Disabled Reason |
|
|
||||||
+----+------------------+-------------------+------+---------+-------+----------------------------+-----------------+
|
|
||||||
| 1 | nova-cert | cloud.example.com | nova | enabled | up | 2016-01-05T17:20:38.000000 | - |
|
|
||||||
| 2 | nova-compute | c01.example.com | nova | enabled | up | 2016-01-05T17:20:38.000000 | - |
|
|
||||||
| 3 | nova-compute | c01.example.com. | nova | enabled | up | 2016-01-05T17:20:38.000000 | - |
|
|
||||||
| 4 | nova-compute | c01.example.com | nova | enabled | up | 2016-01-05T17:20:38.000000 | - |
|
|
||||||
| 5 | nova-compute | c01.example.com | nova | enabled | up | 2016-01-05T17:20:38.000000 | - |
|
|
||||||
| 6 | nova-compute | c01.example.com | nova | enabled | up | 2016-01-05T17:20:38.000000 | - |
|
|
||||||
| 7 | nova-conductor | cloud.example.com | nova | enabled | up | 2016-01-05T17:20:38.000000 | - |
|
|
||||||
| 8 | nova-cert | cloud.example.com | nova | enabled | up | 2016-01-05T17:20:42.000000 | - |
|
|
||||||
| 9 | nova-scheduler | cloud.example.com | nova | enabled | up | 2016-01-05T17:20:38.000000 | - |
|
|
||||||
| 10 | nova-consoleauth | cloud.example.com | nova | enabled | up | 2016-01-05T17:20:35.000000 | - |
|
|
||||||
+----+------------------+-------------------+------+---------+-------+----------------------------+-----------------+
|
|
||||||
|
|
||||||
The output shows that there are five compute nodes and one cloud
|
|
||||||
controller. You see all the services in the up state, which indicates that
|
|
||||||
the services are up and running. If a service is in a down state, it is
|
|
||||||
no longer available. This is an indication that you
|
|
||||||
should troubleshoot why the service is down.
|
|
||||||
|
|
||||||
If you are using cinder, run the following command to see a similar
|
|
||||||
listing:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cinder-manage host list | sort
|
|
||||||
host zone
|
|
||||||
c01.example.com nova
|
|
||||||
c02.example.com nova
|
|
||||||
c03.example.com nova
|
|
||||||
c04.example.com nova
|
|
||||||
c05.example.com nova
|
|
||||||
cloud.example.com nova
|
|
||||||
|
|
||||||
With these two tables, you now have a good overview of what servers and
|
|
||||||
services make up your cloud.
|
|
||||||
|
|
||||||
You can also use the Identity service (keystone) to see what services
|
|
||||||
are available in your cloud as well as what endpoints have been
|
|
||||||
configured for the services.
|
|
||||||
|
|
||||||
The following command requires you to have your shell environment
|
|
||||||
configured with the proper administrative variables:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack catalog list
|
|
||||||
+----------+------------+---------------------------------------------------------------------------------+
|
|
||||||
| Name | Type | Endpoints |
|
|
||||||
+----------+------------+---------------------------------------------------------------------------------+
|
|
||||||
| nova | compute | RegionOne |
|
|
||||||
| | | public: http://192.168.122.10:8774/v2/9faa845768224258808fc17a1bb27e5e |
|
|
||||||
| | | RegionOne |
|
|
||||||
| | | internal: http://192.168.122.10:8774/v2/9faa845768224258808fc17a1bb27e5e |
|
|
||||||
| | | RegionOne |
|
|
||||||
| | | admin: http://192.168.122.10:8774/v2/9faa845768224258808fc17a1bb27e5e |
|
|
||||||
| | | |
|
|
||||||
| cinderv2 | volumev2 | RegionOne |
|
|
||||||
| | | public: http://192.168.122.10:8776/v2/9faa845768224258808fc17a1bb27e5e |
|
|
||||||
| | | RegionOne |
|
|
||||||
| | | internal: http://192.168.122.10:8776/v2/9faa845768224258808fc17a1bb27e5e |
|
|
||||||
| | | RegionOne |
|
|
||||||
| | | admin: http://192.168.122.10:8776/v2/9faa845768224258808fc17a1bb27e5e |
|
|
||||||
| | | |
|
|
||||||
|
|
||||||
The preceding output has been truncated to show only two services. You
|
|
||||||
will see one service entry for each service that your cloud provides.
|
|
||||||
Note how the endpoint domain can be different depending on the endpoint
|
|
||||||
type. Different endpoint domains per type are not required, but this can
|
|
||||||
be done for different reasons, such as endpoint privacy or network
|
|
||||||
traffic segregation.
|
|
||||||
|
|
||||||
You can find the version of the Compute installation by using the
|
|
||||||
OpenStack command-line client:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack --version
|
|
||||||
|
|
||||||
Diagnose Your Compute Nodes
|
|
||||||
---------------------------
|
|
||||||
|
|
||||||
You can obtain extra information about virtual machines that are
|
|
||||||
running—their CPU usage, the memory, the disk I/O or network I/O—per
|
|
||||||
instance, by running the :command:`nova diagnostics` command with a server ID:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ nova diagnostics <serverID>
|
|
||||||
|
|
||||||
The output of this command varies depending on the hypervisor because
|
|
||||||
hypervisors support different attributes. The following demonstrates
|
|
||||||
the difference between the two most popular hypervisors.
|
|
||||||
Here is example output when the hypervisor is Xen:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
+----------------+-----------------+
|
|
||||||
| Property | Value |
|
|
||||||
+----------------+-----------------+
|
|
||||||
| cpu0 | 4.3627 |
|
|
||||||
| memory | 1171088064.0000 |
|
|
||||||
| memory_target | 1171088064.0000 |
|
|
||||||
| vbd_xvda_read | 0.0 |
|
|
||||||
| vbd_xvda_write | 0.0 |
|
|
||||||
| vif_0_rx | 3223.6870 |
|
|
||||||
| vif_0_tx | 0.0 |
|
|
||||||
| vif_1_rx | 104.4955 |
|
|
||||||
| vif_1_tx | 0.0 |
|
|
||||||
+----------------+-----------------+
|
|
||||||
|
|
||||||
While the command should work with any hypervisor that is controlled
|
|
||||||
through libvirt (KVM, QEMU, or LXC), it has been tested only with KVM.
|
|
||||||
Here is the example output when the hypervisor is KVM:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
+------------------+------------+
|
|
||||||
| Property | Value |
|
|
||||||
+------------------+------------+
|
|
||||||
| cpu0_time | 2870000000 |
|
|
||||||
| memory | 524288 |
|
|
||||||
| vda_errors | -1 |
|
|
||||||
| vda_read | 262144 |
|
|
||||||
| vda_read_req | 112 |
|
|
||||||
| vda_write | 5606400 |
|
|
||||||
| vda_write_req | 376 |
|
|
||||||
| vnet0_rx | 63343 |
|
|
||||||
| vnet0_rx_drop | 0 |
|
|
||||||
| vnet0_rx_errors | 0 |
|
|
||||||
| vnet0_rx_packets | 431 |
|
|
||||||
| vnet0_tx | 4905 |
|
|
||||||
| vnet0_tx_drop | 0 |
|
|
||||||
| vnet0_tx_errors | 0 |
|
|
||||||
| vnet0_tx_packets | 45 |
|
|
||||||
+------------------+------------+
|
|
||||||
|
|
||||||
Network Inspection
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
To see which fixed IP networks are configured in your cloud, you can use
|
|
||||||
the :command:`openstack` command-line client to get the IP ranges:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack subnet list
|
|
||||||
+--------------------------------------+----------------+--------------------------------------+-----------------+
|
|
||||||
| ID | Name | Network | Subnet |
|
|
||||||
+--------------------------------------+----------------+--------------------------------------+-----------------+
|
|
||||||
| 346806ee-a53e-44fd-968a-ddb2bcd2ba96 | public_subnet | 0bf90de6-fc0f-4dba-b80d-96670dfb331a | 172.24.4.224/28 |
|
|
||||||
| f939a1e4-3dc3-4540-a9f6-053e6f04918f | private_subnet | 1f7f429e-c38e-47ba-8acf-c44e3f5e8d71 | 10.0.0.0/24 |
|
|
||||||
+--------------------------------------+----------------+--------------------------------------+-----------------+
|
|
||||||
|
|
||||||
The OpenStack command-line client can provide some additional details:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack compute service list
|
|
||||||
+----+------------------+------------+----------+---------+-------+----------------------------+
|
|
||||||
| Id | Binary | Host | Zone | Status | State | Updated At |
|
|
||||||
+----+------------------+------------+----------+---------+-------+----------------------------+
|
|
||||||
| 1 | nova-consoleauth | controller | internal | enabled | up | 2016-08-18T12:16:53.000000 |
|
|
||||||
| 2 | nova-scheduler | controller | internal | enabled | up | 2016-08-18T12:16:59.000000 |
|
|
||||||
| 3 | nova-conductor | controller | internal | enabled | up | 2016-08-18T12:16:52.000000 |
|
|
||||||
| 7 | nova-compute | controller | nova | enabled | up | 2016-08-18T12:16:58.000000 |
|
|
||||||
+----+------------------+------------+----------+---------+-------+----------------------------+
|
|
||||||
|
|
||||||
|
|
||||||
This output shows that two networks are configured, each network
|
|
||||||
containing 255 IPs (a /24 subnet). The first network has been assigned
|
|
||||||
to a certain project, while the second network is still open for
|
|
||||||
assignment. You can assign this network manually; otherwise, it is
|
|
||||||
automatically assigned when a project launches its first instance.
|
|
||||||
|
|
||||||
To find out whether any floating IPs are available in your cloud, run:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack floating ip list
|
|
||||||
+--------------------------------------+---------------------+------------------+--------------------------------------+
|
|
||||||
| ID | Floating IP Address | Fixed IP Address | Port |
|
|
||||||
+--------------------------------------+---------------------+------------------+--------------------------------------+
|
|
||||||
| 340cb36d-6a52-4091-b256-97b6e61cbb20 | 172.24.4.227 | 10.2.1.8 | 1fec8fb8-7a8c-44c2-acd8-f10e2e6cd326 |
|
|
||||||
| 8b1bfc0c-7a91-4da0-b3cc-4acae26cbdec | 172.24.4.228 | None | None |
|
|
||||||
+--------------------------------------+---------------------+------------------+--------------------------------------+
|
|
||||||
|
|
||||||
Here, two floating IPs are available. The first has been allocated to a
|
|
||||||
project, while the other is unallocated.
|
|
||||||
|
|
||||||
Users and Projects
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
To see a list of projects that have been added to the cloud, run:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack project list
|
|
||||||
+----------------------------------+--------------------+
|
|
||||||
| ID | Name |
|
|
||||||
+----------------------------------+--------------------+
|
|
||||||
| 422c17c0b26f4fbe9449f37a5621a5e6 | alt_demo |
|
|
||||||
| 5dc65773519248f3a580cfe28ba7fa3f | demo |
|
|
||||||
| 9faa845768224258808fc17a1bb27e5e | admin |
|
|
||||||
| a733070a420c4b509784d7ea8f6884f7 | invisible_to_admin |
|
|
||||||
| aeb3e976e7794f3f89e4a7965db46c1e | service |
|
|
||||||
+----------------------------------+--------------------+
|
|
||||||
|
|
||||||
To see a list of users, run:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack user list
|
|
||||||
+----------------------------------+----------+
|
|
||||||
| ID | Name |
|
|
||||||
+----------------------------------+----------+
|
|
||||||
| 5837063598694771aedd66aa4cddf0b8 | demo |
|
|
||||||
| 58efd9d852b74b87acc6efafaf31b30e | cinder |
|
|
||||||
| 6845d995a57a441f890abc8f55da8dfb | glance |
|
|
||||||
| ac2d15a1205f46d4837d5336cd4c5f5a | alt_demo |
|
|
||||||
| d8f593c3ae2b47289221f17a776a218b | admin |
|
|
||||||
| d959ec0a99e24df0b7cb106ff940df20 | nova |
|
|
||||||
+----------------------------------+----------+
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Sometimes a user and a group have a one-to-one mapping. This happens
|
|
||||||
for standard system accounts, such as cinder, glance, nova, and
|
|
||||||
swift, or when only one user is part of a group.
|
|
||||||
|
|
||||||
Running Instances
|
|
||||||
~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
To see a list of running instances, run:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack server list --all-projects
|
|
||||||
+--------------------------------------+------+--------+---------------------+------------+
|
|
||||||
| ID | Name | Status | Networks | Image Name |
|
|
||||||
+--------------------------------------+------+--------+---------------------+------------+
|
|
||||||
| 495b4f5e-0b12-4c5a-b4e0-4326dee17a5a | vm1 | ACTIVE | public=172.24.4.232 | cirros |
|
|
||||||
| e83686f9-16e8-45e6-911d-48f75cb8c0fb | vm2 | ACTIVE | private=10.0.0.7 | cirros |
|
|
||||||
+--------------------------------------+------+--------+---------------------+------------+
|
|
||||||
|
|
||||||
Unfortunately, this command does not tell you various details about the
|
|
||||||
running instances, such as what compute node the instance is running on,
|
|
||||||
what flavor the instance is, and so on. You can use the following
|
|
||||||
command to view details about individual instances:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack server show <uuid>
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server show 81db556b-8aa5-427d-a95c-2a9a6972f630
|
|
||||||
+--------------------------------------+----------------------------------------------------------+
|
|
||||||
| Field | Value |
|
|
||||||
+--------------------------------------+----------------------------------------------------------+
|
|
||||||
| OS-DCF:diskConfig | AUTO |
|
|
||||||
| OS-EXT-AZ:availability_zone | nova |
|
|
||||||
| OS-EXT-SRV-ATTR:host | c02.example.com |
|
|
||||||
| OS-EXT-SRV-ATTR:hypervisor_hostname | c02.example.com |
|
|
||||||
| OS-EXT-SRV-ATTR:instance_name | instance-00000001 |
|
|
||||||
| OS-EXT-STS:power_state | Running |
|
|
||||||
| OS-EXT-STS:task_state | None |
|
|
||||||
| OS-EXT-STS:vm_state | active |
|
|
||||||
| OS-SRV-USG:launched_at | 2016-10-19T15:18:09.000000 |
|
|
||||||
| OS-SRV-USG:terminated_at | None |
|
|
||||||
| accessIPv4 | |
|
|
||||||
| accessIPv6 | |
|
|
||||||
| addresses | private=10.0.0.7 |
|
|
||||||
| config_drive | |
|
|
||||||
| created | 2016-10-19T15:17:46Z |
|
|
||||||
| flavor | m1.tiny (1) |
|
|
||||||
| hostId | 2b57e2b7a839508337fb55695b8f6e65aa881460a20449a76352040b |
|
|
||||||
| id | e83686f9-16e8-45e6-911d-48f75cb8c0fb |
|
|
||||||
| image | cirros (9fef3b2d-c35d-4b61-bea8-09cc6dc41829) |
|
|
||||||
| key_name | None |
|
|
||||||
| name | test |
|
|
||||||
| os-extended-volumes:volumes_attached | [] |
|
|
||||||
| progress | 0 |
|
|
||||||
| project_id | 1eaaf6ede7a24e78859591444abf314a |
|
|
||||||
| properties | |
|
|
||||||
| security_groups | [{u'name': u'default'}] |
|
|
||||||
| status | ACTIVE |
|
|
||||||
| updated | 2016-10-19T15:18:58Z |
|
|
||||||
| user_id | 7aaa9b5573ce441b98dae857a82ecc68 |
|
|
||||||
+--------------------------------------+----------------------------------------------------------+
|
|
||||||
|
|
||||||
This output shows that an instance named ``devstack`` was created from
|
|
||||||
an Ubuntu 12.04 image using a flavor of ``m1.small`` and is hosted on
|
|
||||||
the compute node ``c02.example.com``.
|
|
||||||
|
|
||||||
Summary
|
|
||||||
~~~~~~~
|
|
||||||
|
|
||||||
We hope you have enjoyed this quick tour of your working environment,
|
|
||||||
including how to interact with your cloud and extract useful
|
|
||||||
information. From here, you can use the `OpenStack Administrator
|
|
||||||
Guide <https://docs.openstack.org/admin-guide/>`_ as your
|
|
||||||
reference for all of the command-line functionality in your cloud.
|
|
@ -1,10 +0,0 @@
|
|||||||
=======
|
|
||||||
Summary
|
|
||||||
=======
|
|
||||||
|
|
||||||
For stable operations, you want to detect failure promptly and determine
|
|
||||||
causes efficiently. With a distributed system, it's even more important
|
|
||||||
to track the right items to meet a service-level target. Learning where
|
|
||||||
these logs are located in the file system or API gives you an advantage.
|
|
||||||
This chapter also showed how to read, interpret, and manipulate
|
|
||||||
information from OpenStack services so that you can monitor effectively.
|
|
@ -1,15 +0,0 @@
|
|||||||
======================
|
|
||||||
Logging and Monitoring
|
|
||||||
======================
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
ops-logging.rst
|
|
||||||
ops-monitoring.rst
|
|
||||||
ops-logging-monitoring-summary.rst
|
|
||||||
|
|
||||||
As an OpenStack cloud is composed of so many different services, there
|
|
||||||
are a large number of log files. This chapter aims to assist you in
|
|
||||||
locating and working with them and describes other ways to track the
|
|
||||||
status of your deployment.
|
|
@ -1,105 +0,0 @@
|
|||||||
=======
|
|
||||||
rsyslog
|
|
||||||
=======
|
|
||||||
|
|
||||||
A number of operating systems use rsyslog as the default logging service.
|
|
||||||
Since it is natively able to send logs to a remote location, you do not
|
|
||||||
have to install anything extra to enable this feature, just modify the
|
|
||||||
configuration file. In doing this, consider running your logging over a
|
|
||||||
management network or using an encrypted VPN to avoid interception.
|
|
||||||
|
|
||||||
rsyslog client configuration
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
To begin, configure all OpenStack components to log to the syslog log
|
|
||||||
file in addition to their standard log file location. Also, configure each
|
|
||||||
component to log to a different syslog facility. This makes it easier to
|
|
||||||
split the logs into individual components on the central server:
|
|
||||||
|
|
||||||
``nova.conf``:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
use_syslog=True
|
|
||||||
syslog_log_facility=LOG_LOCAL0
|
|
||||||
|
|
||||||
``glance-api.conf`` and ``glance-registry.conf``:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
use_syslog=True
|
|
||||||
syslog_log_facility=LOG_LOCAL1
|
|
||||||
|
|
||||||
``cinder.conf``:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
use_syslog=True
|
|
||||||
syslog_log_facility=LOG_LOCAL2
|
|
||||||
|
|
||||||
``keystone.conf``:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
use_syslog=True
|
|
||||||
syslog_log_facility=LOG_LOCAL3
|
|
||||||
|
|
||||||
By default, Object Storage logs to syslog.
|
|
||||||
|
|
||||||
Next, create ``/etc/rsyslog.d/client.conf`` with the following line:
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
*.* @192.168.1.10
|
|
||||||
|
|
||||||
This instructs rsyslog to send all logs to the IP listed. In this
|
|
||||||
example, the IP points to the cloud controller.
|
|
||||||
|
|
||||||
rsyslog server configuration
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Designate a server as the central logging server. The best practice is
|
|
||||||
to choose a server that is solely dedicated to this purpose. Create a
|
|
||||||
file called ``/etc/rsyslog.d/server.conf`` with the following contents:
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
# Enable UDP
|
|
||||||
$ModLoad imudp
|
|
||||||
# Listen on 192.168.1.10 only
|
|
||||||
$UDPServerAddress 192.168.1.10
|
|
||||||
# Port 514
|
|
||||||
$UDPServerRun 514
|
|
||||||
|
|
||||||
# Create logging templates for nova
|
|
||||||
$template NovaFile,"/var/log/rsyslog/%HOSTNAME%/nova.log"
|
|
||||||
$template NovaAll,"/var/log/rsyslog/nova.log"
|
|
||||||
|
|
||||||
# Log everything else to syslog.log
|
|
||||||
$template DynFile,"/var/log/rsyslog/%HOSTNAME%/syslog.log"
|
|
||||||
*.* ?DynFile
|
|
||||||
|
|
||||||
# Log various openstack components to their own individual file
|
|
||||||
local0.* ?NovaFile
|
|
||||||
local0.* ?NovaAll
|
|
||||||
& ~
|
|
||||||
|
|
||||||
This example configuration handles the nova service only. It first
|
|
||||||
configures rsyslog to act as a server that runs on port 514. Next, it
|
|
||||||
creates a series of logging templates. Logging templates control where
|
|
||||||
received logs are stored. Using the last example, a nova log from
|
|
||||||
c01.example.com goes to the following locations:
|
|
||||||
|
|
||||||
- ``/var/log/rsyslog/c01.example.com/nova.log``
|
|
||||||
|
|
||||||
- ``/var/log/rsyslog/nova.log``
|
|
||||||
|
|
||||||
This is useful, as logs from c02.example.com go to:
|
|
||||||
|
|
||||||
- ``/var/log/rsyslog/c02.example.com/nova.log``
|
|
||||||
|
|
||||||
- ``/var/log/rsyslog/nova.log``
|
|
||||||
|
|
||||||
This configuration will result in a separate log file for each compute
|
|
||||||
node as well as an aggregated log file that contains nova logs from all
|
|
||||||
nodes.
|
|
@ -1,257 +0,0 @@
|
|||||||
=======
|
|
||||||
Logging
|
|
||||||
=======
|
|
||||||
|
|
||||||
Where Are the Logs?
|
|
||||||
~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Most services use the convention of writing their log files to
|
|
||||||
subdirectories of the ``/var/log directory``, as listed in
|
|
||||||
:ref:`table_log_locations`.
|
|
||||||
|
|
||||||
.. _table_log_locations:
|
|
||||||
|
|
||||||
.. list-table:: Table OpenStack log locations
|
|
||||||
:widths: 25 25 50
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* - Node type
|
|
||||||
- Service
|
|
||||||
- Log location
|
|
||||||
* - Cloud controller
|
|
||||||
- ``nova-*``
|
|
||||||
- ``/var/log/nova``
|
|
||||||
* - Cloud controller
|
|
||||||
- ``glance-*``
|
|
||||||
- ``/var/log/glance``
|
|
||||||
* - Cloud controller
|
|
||||||
- ``cinder-*``
|
|
||||||
- ``/var/log/cinder``
|
|
||||||
* - Cloud controller
|
|
||||||
- ``keystone-*``
|
|
||||||
- ``/var/log/keystone``
|
|
||||||
* - Cloud controller
|
|
||||||
- ``neutron-*``
|
|
||||||
- ``/var/log/neutron``
|
|
||||||
* - Cloud controller
|
|
||||||
- horizon
|
|
||||||
- ``/var/log/apache2/``
|
|
||||||
* - All nodes
|
|
||||||
- misc (swift, dnsmasq)
|
|
||||||
- ``/var/log/syslog``
|
|
||||||
* - Compute nodes
|
|
||||||
- libvirt
|
|
||||||
- ``/var/log/libvirt/libvirtd.log``
|
|
||||||
* - Compute nodes
|
|
||||||
- Console (boot up messages) for VM instances:
|
|
||||||
- ``/var/lib/nova/instances/instance-<instance id>/console.log``
|
|
||||||
* - Block Storage nodes
|
|
||||||
- cinder-volume
|
|
||||||
- ``/var/log/cinder/cinder-volume.log``
|
|
||||||
|
|
||||||
Reading the Logs
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
OpenStack services use the standard logging levels, at increasing
|
|
||||||
severity: TRACE, DEBUG, INFO, AUDIT, WARNING, ERROR, and CRITICAL. That
|
|
||||||
is, messages only appear in the logs if they are more "severe" than the
|
|
||||||
particular log level, with DEBUG allowing all log statements through.
|
|
||||||
For example, TRACE is logged only if the software has a stack trace,
|
|
||||||
while INFO is logged for every message including those that are only for
|
|
||||||
information.
|
|
||||||
|
|
||||||
To disable DEBUG-level logging, edit ``/etc/nova/nova.conf`` file as follows:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
debug=false
|
|
||||||
|
|
||||||
Keystone is handled a little differently. To modify the logging level,
|
|
||||||
edit the ``/etc/keystone/logging.conf`` file and look at the
|
|
||||||
``logger_root`` and ``handler_file`` sections.
|
|
||||||
|
|
||||||
Logging for horizon is configured in
|
|
||||||
``/etc/openstack_dashboard/local_settings.py``. Because horizon is
|
|
||||||
a Django web application, it follows the `Django Logging framework
|
|
||||||
conventions <https://docs.djangoproject.com/en/dev/topics/logging/>`_.
|
|
||||||
|
|
||||||
The first step in finding the source of an error is typically to search
|
|
||||||
for a CRITICAL, or ERROR message in the log starting at the
|
|
||||||
bottom of the log file.
|
|
||||||
|
|
||||||
Here is an example of a log message with the corresponding
|
|
||||||
ERROR (Python traceback) immediately following:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server [req-c0b38ace-2586-48ce-9336-6233efa1f035 6c9808c2c5044e1388a83a74da9364d5 e07f5395c
|
|
||||||
2eb428cafc41679e7deeab1 - default default] Exception during message handling
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server Traceback (most recent call last):
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/oslo_messaging/rpc/server.py", line 133, in _process_incoming
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server res = self.dispatcher.dispatch(message)
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/oslo_messaging/rpc/dispatcher.py", line 150, in dispatch
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server return self._do_dispatch(endpoint, method, ctxt, args)
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/oslo_messaging/rpc/dispatcher.py", line 121, in _do_dispatch
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server result = func(ctxt, **new_args)
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/cinder/volume/manager.py", line 4366, in create_volume
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server allow_reschedule=allow_reschedule, volume=volume)
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/cinder/volume/manager.py", line 634, in create_volume
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server _run_flow()
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/cinder/volume/manager.py", line 626, in _run_flow
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server flow_engine.run()
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/taskflow/engines/action_engine/engine.py", line 247, in run
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server for _state in self.run_iter(timeout=timeout):
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/taskflow/engines/action_engine/engine.py", line 340, in run_iter
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server failure.Failure.reraise_if_any(er_failures)
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/taskflow/types/failure.py", line 336, in reraise_if_any
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server failures[0].reraise()
|
|
||||||
2017-01-18 15:54:00.467 32552 ERROR oslo_messaging.rpc.server File "/openstack/venvs/cinder-14.0.0/lib/python2.7/site-packages/taskflow/types/failure.py", line 343, in reraise
|
|
||||||
|
|
||||||
In this example, ``cinder-volumes`` failed to start and has provided a
|
|
||||||
stack trace, since its volume back end has been unable to set up the
|
|
||||||
storage volume—probably because the LVM volume that is expected from the
|
|
||||||
configuration does not exist.
|
|
||||||
|
|
||||||
Here is an example error log:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
2013-02-25 20:26:33 6619 ERROR nova.openstack.common.rpc.common [-] AMQP server on localhost:5672 is unreachable:
|
|
||||||
[Errno 111] ECONNREFUSED. Trying again in 23 seconds.
|
|
||||||
|
|
||||||
In this error, a nova service has failed to connect to the RabbitMQ
|
|
||||||
server because it got a connection refused error.
|
|
||||||
|
|
||||||
Tracing Instance Requests
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
When an instance fails to behave properly, you will often have to trace
|
|
||||||
activity associated with that instance across the log files of various
|
|
||||||
``nova-*`` services and across both the cloud controller and compute
|
|
||||||
nodes.
|
|
||||||
|
|
||||||
The typical way is to trace the UUID associated with an instance across
|
|
||||||
the service logs.
|
|
||||||
|
|
||||||
Consider the following example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack server list
|
|
||||||
+--------------------------------+--------+--------+--------------------------+------------+
|
|
||||||
| ID | Name | Status | Networks | Image Name |
|
|
||||||
+--------------------------------+--------+--------+--------------------------+------------+
|
|
||||||
| fafed8-4a46-413b-b113-f1959ffe | cirros | ACTIVE | novanetwork=192.168.100.3| cirros |
|
|
||||||
+--------------------------------------+--------+--------+--------------------+------------+
|
|
||||||
|
|
||||||
Here, the ID associated with the instance is
|
|
||||||
``faf7ded8-4a46-413b-b113-f19590746ffe``. If you search for this string
|
|
||||||
on the cloud controller in the ``/var/log/nova-*.log`` files, it appears
|
|
||||||
in ``nova-api.log`` and ``nova-scheduler.log``. If you search for this
|
|
||||||
on the compute nodes in ``/var/log/nova-*.log``, it appears in
|
|
||||||
``nova-compute.log``. If no ERROR or CRITICAL messages appear, the most
|
|
||||||
recent log entry that reports this may provide a hint about what has gone
|
|
||||||
wrong.
|
|
||||||
|
|
||||||
Adding Custom Logging Statements
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
If there is not enough information in the existing logs, you may need to
|
|
||||||
add your own custom logging statements to the ``nova-*``
|
|
||||||
services.
|
|
||||||
|
|
||||||
The source files are located in
|
|
||||||
``/usr/lib/python2.7/dist-packages/nova``.
|
|
||||||
|
|
||||||
To add logging statements, the following line should be near the top of
|
|
||||||
the file. For most files, these should already be there:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from nova.openstack.common import log as logging
|
|
||||||
LOG = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
To add a DEBUG logging statement, you would do:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
LOG.debug("This is a custom debugging statement")
|
|
||||||
|
|
||||||
You may notice that all the existing logging messages are preceded by an
|
|
||||||
underscore and surrounded by parentheses, for example:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
LOG.debug(_("Logging statement appears here"))
|
|
||||||
|
|
||||||
This formatting is used to support translation of logging messages into
|
|
||||||
different languages using the
|
|
||||||
`gettext <https://docs.python.org/2/library/gettext.html>`_
|
|
||||||
internationalization library. You don't need to do this for your own
|
|
||||||
custom log messages. However, if you want to contribute the code back to
|
|
||||||
the OpenStack project that includes logging statements, you must
|
|
||||||
surround your log messages with underscores and parentheses.
|
|
||||||
|
|
||||||
RabbitMQ Web Management Interface or rabbitmqctl
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Aside from connection failures, RabbitMQ log files are generally not
|
|
||||||
useful for debugging OpenStack related issues. Instead, we recommend you
|
|
||||||
use the RabbitMQ web management interface. Enable it on your cloud
|
|
||||||
controller:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# /usr/lib/rabbitmq/bin/rabbitmq-plugins enable rabbitmq_management
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service rabbitmq-server restart
|
|
||||||
|
|
||||||
The RabbitMQ web management interface is accessible on your cloud
|
|
||||||
controller at *http://localhost:55672*.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Ubuntu 12.04 installs RabbitMQ version 2.7.1, which uses port 55672.
|
|
||||||
RabbitMQ versions 3.0 and above use port 15672 instead. You can
|
|
||||||
check which version of RabbitMQ you have running on your local
|
|
||||||
Ubuntu machine by doing:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ dpkg -s rabbitmq-server | grep "Version:"
|
|
||||||
Version: 2.7.1-0ubuntu4
|
|
||||||
|
|
||||||
An alternative to enabling the RabbitMQ web management interface is to
|
|
||||||
use the ``rabbitmqctl`` commands. For example,
|
|
||||||
:command:`rabbitmqctl list_queues| grep cinder` displays any messages left in
|
|
||||||
the queue. If there are messages, it's a possible sign that cinder
|
|
||||||
services didn't connect properly to rabbitmq and might have to be
|
|
||||||
restarted.
|
|
||||||
|
|
||||||
Items to monitor for RabbitMQ include the number of items in each of the
|
|
||||||
queues and the processing time statistics for the server.
|
|
||||||
|
|
||||||
Centrally Managing Logs
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Because your cloud is most likely composed of many servers, you must
|
|
||||||
check logs on each of those servers to properly piece an event together.
|
|
||||||
A better solution is to send the logs of all servers to a central
|
|
||||||
location so that they can all be accessed from the same
|
|
||||||
area.
|
|
||||||
|
|
||||||
The choice of central logging engine will be dependent on the operating
|
|
||||||
system in use as well as any organizational requirements for logging tools.
|
|
||||||
|
|
||||||
Syslog choices
|
|
||||||
--------------
|
|
||||||
|
|
||||||
There are a large number of syslogs engines available, each have differing
|
|
||||||
capabilities and configuration requirements.
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
ops-logging-rsyslog.rst
|
|
@ -1,50 +0,0 @@
|
|||||||
===========================
|
|
||||||
Handling a Complete Failure
|
|
||||||
===========================
|
|
||||||
|
|
||||||
A common way of dealing with the recovery from a full system failure,
|
|
||||||
such as a power outage of a data center, is to assign each service a
|
|
||||||
priority, and restore in order.
|
|
||||||
:ref:`table_example_priority` shows an example.
|
|
||||||
|
|
||||||
.. _table_example_priority:
|
|
||||||
|
|
||||||
.. list-table:: Table. Example service restoration priority list
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* - Priority
|
|
||||||
- Services
|
|
||||||
* - 1
|
|
||||||
- Internal network connectivity
|
|
||||||
* - 2
|
|
||||||
- Backing storage services
|
|
||||||
* - 3
|
|
||||||
- Public network connectivity for user virtual machines
|
|
||||||
* - 4
|
|
||||||
- ``nova-compute``, cinder hosts
|
|
||||||
* - 5
|
|
||||||
- User virtual machines
|
|
||||||
* - 10
|
|
||||||
- Message queue and database services
|
|
||||||
* - 15
|
|
||||||
- Keystone services
|
|
||||||
* - 20
|
|
||||||
- ``cinder-scheduler``
|
|
||||||
* - 21
|
|
||||||
- Image Catalog and Delivery services
|
|
||||||
* - 22
|
|
||||||
- ``nova-scheduler`` services
|
|
||||||
* - 98
|
|
||||||
- ``cinder-api``
|
|
||||||
* - 99
|
|
||||||
- ``nova-api`` services
|
|
||||||
* - 100
|
|
||||||
- Dashboard node
|
|
||||||
|
|
||||||
Use this example priority list to ensure that user-affected services are
|
|
||||||
restored as soon as possible, but not before a stable environment is in
|
|
||||||
place. Of course, despite being listed as a single-line item, each step
|
|
||||||
requires significant work. For example, just after starting the
|
|
||||||
database, you should check its integrity, or, after starting the nova
|
|
||||||
services, you should verify that the hypervisor matches the database and
|
|
||||||
fix any mismatches.
|
|
@ -1,638 +0,0 @@
|
|||||||
=====================================
|
|
||||||
Compute Node Failures and Maintenance
|
|
||||||
=====================================
|
|
||||||
|
|
||||||
Sometimes a compute node either crashes unexpectedly or requires a
|
|
||||||
reboot for maintenance reasons.
|
|
||||||
|
|
||||||
Planned Maintenance
|
|
||||||
~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
If you need to reboot a compute node due to planned maintenance, such as
|
|
||||||
a software or hardware upgrade, perform the following steps:
|
|
||||||
|
|
||||||
#. Disable scheduling of new VMs to the node, optionally providing a reason
|
|
||||||
comment:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack compute service set --disable --disable-reason \
|
|
||||||
maintenance c01.example.com nova-compute
|
|
||||||
|
|
||||||
#. Verify that all hosted instances have been moved off the node:
|
|
||||||
|
|
||||||
* If your cloud is using a shared storage:
|
|
||||||
|
|
||||||
#. Get a list of instances that need to be moved:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server list --host c01.example.com --all-projects
|
|
||||||
|
|
||||||
#. Migrate all instances one by one:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server migrate <uuid> --live c02.example.com
|
|
||||||
|
|
||||||
* If your cloud is not using a shared storage, run:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server migrate <uuid> --live --block-migration c02.example.com
|
|
||||||
|
|
||||||
#. Stop the ``nova-compute`` service:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# stop nova-compute
|
|
||||||
|
|
||||||
If you use a configuration-management system, such as Puppet, that
|
|
||||||
ensures the ``nova-compute`` service is always running, you can
|
|
||||||
temporarily move the ``init`` files:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mkdir /root/tmp
|
|
||||||
# mv /etc/init/nova-compute.conf /root/tmp
|
|
||||||
# mv /etc/init.d/nova-compute /root/tmp
|
|
||||||
|
|
||||||
#. Shut down your compute node, perform the maintenance, and turn
|
|
||||||
the node back on.
|
|
||||||
|
|
||||||
#. Start the ``nova-compute`` service:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# start nova-compute
|
|
||||||
|
|
||||||
You can re-enable the ``nova-compute`` service by undoing the commands:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mv /root/tmp/nova-compute.conf /etc/init
|
|
||||||
# mv /root/tmp/nova-compute /etc/init.d/
|
|
||||||
|
|
||||||
#. Enable scheduling of VMs to the node:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack compute service set --enable c01.example.com nova-compute
|
|
||||||
|
|
||||||
#. Optionally, migrate the instances back to their original compute node.
|
|
||||||
|
|
||||||
After a Compute Node Reboots
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
When you reboot a compute node, first verify that it booted
|
|
||||||
successfully. This includes ensuring that the ``nova-compute`` service
|
|
||||||
is running:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# ps aux | grep nova-compute
|
|
||||||
# status nova-compute
|
|
||||||
|
|
||||||
Also ensure that it has successfully connected to the AMQP server:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# grep AMQP /var/log/nova/nova-compute.log
|
|
||||||
2013-02-26 09:51:31 12427 INFO nova.openstack.common.rpc.common [-] Connected to AMQP server on 199.116.232.36:5672
|
|
||||||
|
|
||||||
After the compute node is successfully running, you must deal with the
|
|
||||||
instances that are hosted on that compute node because none of them are
|
|
||||||
running. Depending on your SLA with your users or customers, you might
|
|
||||||
have to start each instance and ensure that they start correctly.
|
|
||||||
|
|
||||||
Instances
|
|
||||||
~~~~~~~~~
|
|
||||||
|
|
||||||
You can create a list of instances that are hosted on the compute node
|
|
||||||
by performing the following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server list --host c01.example.com --all-projects
|
|
||||||
|
|
||||||
After you have the list, you can use the :command:`openstack` command to
|
|
||||||
start each instance:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server reboot <server>
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Any time an instance shuts down unexpectedly, it might have problems
|
|
||||||
on boot. For example, the instance might require an ``fsck`` on the
|
|
||||||
root partition. If this happens, the user can use the dashboard VNC
|
|
||||||
console to fix this.
|
|
||||||
|
|
||||||
If an instance does not boot, meaning ``virsh list`` never shows the
|
|
||||||
instance as even attempting to boot, do the following on the compute
|
|
||||||
node:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# tail -f /var/log/nova/nova-compute.log
|
|
||||||
|
|
||||||
Try executing the :command:`openstack server reboot` command again. You should
|
|
||||||
see an error message about why the instance was not able to boot.
|
|
||||||
|
|
||||||
In most cases, the error is the result of something in libvirt's XML
|
|
||||||
file (``/etc/libvirt/qemu/instance-xxxxxxxx.xml``) that no longer
|
|
||||||
exists. You can enforce re-creation of the XML file as well as rebooting
|
|
||||||
the instance by running the following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server reboot --hard <server>
|
|
||||||
|
|
||||||
Inspecting and Recovering Data from Failed Instances
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
In some scenarios, instances are running but are inaccessible through
|
|
||||||
SSH and do not respond to any command. The VNC console could be
|
|
||||||
displaying a boot failure or kernel panic error messages. This could be
|
|
||||||
an indication of file system corruption on the VM itself. If you need to
|
|
||||||
recover files or inspect the content of the instance, qemu-nbd can be
|
|
||||||
used to mount the disk.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
If you access or view the user's content and data, get approval first!
|
|
||||||
|
|
||||||
To access the instance's disk
|
|
||||||
(``/var/lib/nova/instances/instance-xxxxxx/disk``), use the following
|
|
||||||
steps:
|
|
||||||
|
|
||||||
#. Suspend the instance using the ``virsh`` command.
|
|
||||||
|
|
||||||
#. Connect the qemu-nbd device to the disk.
|
|
||||||
|
|
||||||
#. Mount the qemu-nbd device.
|
|
||||||
|
|
||||||
#. Unmount the device after inspecting.
|
|
||||||
|
|
||||||
#. Disconnect the qemu-nbd device.
|
|
||||||
|
|
||||||
#. Resume the instance.
|
|
||||||
|
|
||||||
If you do not follow last three steps, OpenStack Compute cannot manage
|
|
||||||
the instance any longer. It fails to respond to any command issued by
|
|
||||||
OpenStack Compute, and it is marked as shut down.
|
|
||||||
|
|
||||||
Once you mount the disk file, you should be able to access it and treat
|
|
||||||
it as a collection of normal directories with files and a directory
|
|
||||||
structure. However, we do not recommend that you edit or touch any files
|
|
||||||
because this could change the
|
|
||||||
:term:`access control lists (ACLs) <access control list (ACL)>` that are used
|
|
||||||
to determine which accounts can perform what operations on files and
|
|
||||||
directories. Changing ACLs can make the instance unbootable if it is not
|
|
||||||
already.
|
|
||||||
|
|
||||||
#. Suspend the instance using the :command:`virsh` command, taking note of the
|
|
||||||
internal ID:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# virsh list
|
|
||||||
Id Name State
|
|
||||||
----------------------------------
|
|
||||||
1 instance-00000981 running
|
|
||||||
2 instance-000009f5 running
|
|
||||||
30 instance-0000274a running
|
|
||||||
|
|
||||||
# virsh suspend 30
|
|
||||||
Domain 30 suspended
|
|
||||||
|
|
||||||
#. Find the ID for each instance by listing the server IDs using the
|
|
||||||
following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server list
|
|
||||||
+--------------------------------------+-------+---------+-----------------------------+------------+
|
|
||||||
| ID | Name | Status | Networks | Image Name |
|
|
||||||
+--------------------------------------+-------+---------+-----------------------------+------------+
|
|
||||||
| 2da14c5c-de6d-407d-a7d2-2dd0862b9967 | try3 | ACTIVE | finance-internal=10.10.0.4 | |
|
|
||||||
| 223f4860-722a-44a0-bac7-f73f58beec7b | try2 | ACTIVE | finance-internal=10.10.0.13 | |
|
|
||||||
+--------------------------------------+-------+---------+-----------------------------+------------+
|
|
||||||
|
|
||||||
#. Connect the qemu-nbd device to the disk:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cd /var/lib/nova/instances/instance-0000274a
|
|
||||||
# ls -lh
|
|
||||||
total 33M
|
|
||||||
-rw-rw---- 1 libvirt-qemu kvm 6.3K Oct 15 11:31 console.log
|
|
||||||
-rw-r--r-- 1 libvirt-qemu kvm 33M Oct 15 22:06 disk
|
|
||||||
-rw-r--r-- 1 libvirt-qemu kvm 384K Oct 15 22:06 disk.local
|
|
||||||
-rw-rw-r-- 1 nova nova 1.7K Oct 15 11:30 libvirt.xml
|
|
||||||
# qemu-nbd -c /dev/nbd0 `pwd`/disk
|
|
||||||
|
|
||||||
#. Mount the qemu-nbd device.
|
|
||||||
|
|
||||||
The qemu-nbd device tries to export the instance disk's different
|
|
||||||
partitions as separate devices. For example, if vda is the disk and
|
|
||||||
vda1 is the root partition, qemu-nbd exports the device as
|
|
||||||
``/dev/nbd0`` and ``/dev/nbd0p1``, respectively:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mount /dev/nbd0p1 /mnt/
|
|
||||||
|
|
||||||
You can now access the contents of ``/mnt``, which correspond to the
|
|
||||||
first partition of the instance's disk.
|
|
||||||
|
|
||||||
To examine the secondary or ephemeral disk, use an alternate mount
|
|
||||||
point if you want both primary and secondary drives mounted at the
|
|
||||||
same time:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# umount /mnt
|
|
||||||
# qemu-nbd -c /dev/nbd1 `pwd`/disk.local
|
|
||||||
# mount /dev/nbd1 /mnt/
|
|
||||||
# ls -lh /mnt/
|
|
||||||
total 76K
|
|
||||||
lrwxrwxrwx. 1 root root 7 Oct 15 00:44 bin -> usr/bin
|
|
||||||
dr-xr-xr-x. 4 root root 4.0K Oct 15 01:07 boot
|
|
||||||
drwxr-xr-x. 2 root root 4.0K Oct 15 00:42 dev
|
|
||||||
drwxr-xr-x. 70 root root 4.0K Oct 15 11:31 etc
|
|
||||||
drwxr-xr-x. 3 root root 4.0K Oct 15 01:07 home
|
|
||||||
lrwxrwxrwx. 1 root root 7 Oct 15 00:44 lib -> usr/lib
|
|
||||||
lrwxrwxrwx. 1 root root 9 Oct 15 00:44 lib64 -> usr/lib64
|
|
||||||
drwx------. 2 root root 16K Oct 15 00:42 lost+found
|
|
||||||
drwxr-xr-x. 2 root root 4.0K Feb 3 2012 media
|
|
||||||
drwxr-xr-x. 2 root root 4.0K Feb 3 2012 mnt
|
|
||||||
drwxr-xr-x. 2 root root 4.0K Feb 3 2012 opt
|
|
||||||
drwxr-xr-x. 2 root root 4.0K Oct 15 00:42 proc
|
|
||||||
dr-xr-x---. 3 root root 4.0K Oct 15 21:56 root
|
|
||||||
drwxr-xr-x. 14 root root 4.0K Oct 15 01:07 run
|
|
||||||
lrwxrwxrwx. 1 root root 8 Oct 15 00:44 sbin -> usr/sbin
|
|
||||||
drwxr-xr-x. 2 root root 4.0K Feb 3 2012 srv
|
|
||||||
drwxr-xr-x. 2 root root 4.0K Oct 15 00:42 sys
|
|
||||||
drwxrwxrwt. 9 root root 4.0K Oct 15 16:29 tmp
|
|
||||||
drwxr-xr-x. 13 root root 4.0K Oct 15 00:44 usr
|
|
||||||
drwxr-xr-x. 17 root root 4.0K Oct 15 00:44 var
|
|
||||||
|
|
||||||
#. Once you have completed the inspection, unmount the mount point and
|
|
||||||
release the qemu-nbd device:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# umount /mnt
|
|
||||||
# qemu-nbd -d /dev/nbd0
|
|
||||||
/dev/nbd0 disconnected
|
|
||||||
|
|
||||||
#. Resume the instance using :command:`virsh`:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# virsh list
|
|
||||||
Id Name State
|
|
||||||
----------------------------------
|
|
||||||
1 instance-00000981 running
|
|
||||||
2 instance-000009f5 running
|
|
||||||
30 instance-0000274a paused
|
|
||||||
|
|
||||||
# virsh resume 30
|
|
||||||
Domain 30 resumed
|
|
||||||
|
|
||||||
Managing floating IP addresses between instances
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
In an elastic cloud environment using the ``Public_AGILE`` network, each
|
|
||||||
instance has a publicly accessible IPv4 & IPv6 address. It does not support
|
|
||||||
the concept of OpenStack floating IP addresses that can easily be attached,
|
|
||||||
removed, and transferred between instances. However, there is a workaround
|
|
||||||
using neutron ports which contain the IPv4 & IPv6 address.
|
|
||||||
|
|
||||||
**Create a port that can be reused**
|
|
||||||
|
|
||||||
#. Create a port on the ``Public_AGILE`` network:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack port create port1 --network Public_AGILE
|
|
||||||
|
|
||||||
Created a new port:
|
|
||||||
+-----------------------+------------------------------------------------------+
|
|
||||||
| Field | Value |
|
|
||||||
+-----------------------+------------------------------------------------------+
|
|
||||||
| admin_state_up | UP |
|
|
||||||
| allowed_address_pairs | |
|
|
||||||
| binding_host_id | None |
|
|
||||||
| binding_profile | None |
|
|
||||||
| binding_vif_details | None |
|
|
||||||
| binding_vif_type | None |
|
|
||||||
| binding_vnic_type | normal |
|
|
||||||
| created_at | 2017-02-26T14:23:18Z |
|
|
||||||
| description | |
|
|
||||||
| device_id | |
|
|
||||||
| device_owner | |
|
|
||||||
| dns_assignment | None |
|
|
||||||
| dns_name | None |
|
|
||||||
| extra_dhcp_opts | |
|
|
||||||
| fixed_ips | ip_address='96.118.182.106', |
|
|
||||||
| | subnet_id='4279c70a-7218-4c7e-94e5-7bd4c045644e' |
|
|
||||||
| | ip_address='2001:558:fc0b:100:f816:3eff:fefb:45fb', |
|
|
||||||
| | subnet_id='11d8087b-6288-4129-95ff-42c3df0c1df0' |
|
|
||||||
| id | 3871bf29-e963-4701-a7dd-8888dbaab375 |
|
|
||||||
| ip_address | None |
|
|
||||||
| mac_address | fa:16:3e:e2:09:e0 |
|
|
||||||
| name | port1 |
|
|
||||||
| network_id | f41bd921-3a59-49c4-aa95-c2e4496a4b56 |
|
|
||||||
| option_name | None |
|
|
||||||
| option_value | None |
|
|
||||||
| port_security_enabled | True |
|
|
||||||
| project_id | 52f0574689f14c8a99e7ca22c4eb572 |
|
|
||||||
| qos_policy_id | None |
|
|
||||||
| revision_number | 6 |
|
|
||||||
| security_groups | 20d96891-0055-428a-8fa6-d5aed25f0dc6 |
|
|
||||||
| status | DOWN |
|
|
||||||
| subnet_id | None |
|
|
||||||
| updated_at | 2017-02-26T14:23:19Z |
|
|
||||||
+-----------------------+------------------------------------------------------+
|
|
||||||
|
|
||||||
#. If you know the fully qualified domain name (FQDN) that will be assigned to
|
|
||||||
the IP address, assign the port with the same name:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack port create "example-fqdn-01.sys.example.com" --network Public_AGILE
|
|
||||||
|
|
||||||
Created a new port:
|
|
||||||
+-----------------------+------------------------------------------------------+
|
|
||||||
| Field | Value |
|
|
||||||
+-----------------------+------------------------------------------------------+
|
|
||||||
| admin_state_up | UP |
|
|
||||||
| allowed_address_pairs | |
|
|
||||||
| binding_host_id | None |
|
|
||||||
| binding_profile | None |
|
|
||||||
| binding_vif_details | None |
|
|
||||||
| binding_vif_type | None |
|
|
||||||
| binding_vnic_type | normal |
|
|
||||||
| created_at | 2017-02-26T14:24:16Z |
|
|
||||||
| description | |
|
|
||||||
| device_id | |
|
|
||||||
| device_owner | |
|
|
||||||
| dns_assignment | None |
|
|
||||||
| dns_name | None |
|
|
||||||
| extra_dhcp_opts | |
|
|
||||||
| fixed_ips | ip_address='96.118.182.107', |
|
|
||||||
| | subnet_id='4279c70a-7218-4c7e-94e5-7bd4c045644e' |
|
|
||||||
| | ip_address='2001:558:fc0b:100:f816:3eff:fefb:65fc', |
|
|
||||||
| | subnet_id='11d8087b-6288-4129-95ff-42c3df0c1df0' |
|
|
||||||
| id | 731c3b28-3753-4e63-bae3-b58a52d6ccca |
|
|
||||||
| ip_address | None |
|
|
||||||
| mac_address | fa:16:3e:fb:65:fc |
|
|
||||||
| name | example-fqdn-01.sys.example.com |
|
|
||||||
| network_id | f41bd921-3a59-49c4-aa95-c2e4496a4b56 |
|
|
||||||
| option_name | None |
|
|
||||||
| option_value | None |
|
|
||||||
| port_security_enabled | True |
|
|
||||||
| project_id | 52f0574689f14c8a99e7ca22c4eb5720 |
|
|
||||||
| qos_policy_id | None |
|
|
||||||
| revision_number | 6 |
|
|
||||||
| security_groups | 20d96891-0055-428a-8fa6-d5aed25f0dc6 |
|
|
||||||
| status | DOWN |
|
|
||||||
| subnet_id | None |
|
|
||||||
| updated_at | 2017-02-26T14:24:17Z |
|
|
||||||
+-----------------------+------------------------------------------------------+
|
|
||||||
|
|
||||||
#. Use the port when creating an instance:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack server create --flavor m1.medium --image ubuntu.qcow2 \
|
|
||||||
--key-name team_key --nic port-id=PORT_ID \
|
|
||||||
"example-fqdn-01.sys.example.com"
|
|
||||||
|
|
||||||
#. Verify the instance has the correct IP address:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
+--------------------------------------+----------------------------------------------------------+
|
|
||||||
| Field | Value |
|
|
||||||
+--------------------------------------+----------------------------------------------------------+
|
|
||||||
| OS-DCF:diskConfig | MANUAL |
|
|
||||||
| OS-EXT-AZ:availability_zone | nova |
|
|
||||||
| OS-EXT-SRV-ATTR:host | os_compute-1 |
|
|
||||||
| OS-EXT-SRV-ATTR:hypervisor_hostname | os_compute.ece.example.com |
|
|
||||||
| OS-EXT-SRV-ATTR:instance_name | instance-00012b82 |
|
|
||||||
| OS-EXT-STS:power_state | Running |
|
|
||||||
| OS-EXT-STS:task_state | None |
|
|
||||||
| OS-EXT-STS:vm_state | active |
|
|
||||||
| OS-SRV-USG:launched_at | 2016-11-30T08:55:27.000000 |
|
|
||||||
| OS-SRV-USG:terminated_at | None |
|
|
||||||
| accessIPv4 | |
|
|
||||||
| accessIPv6 | |
|
|
||||||
| addresses | public=172.24.4.236 |
|
|
||||||
| config_drive | |
|
|
||||||
| created | 2016-11-30T08:55:14Z |
|
|
||||||
| flavor | m1.medium (103) |
|
|
||||||
| hostId | aca973d5b7981faaf8c713a0130713bbc1e64151be65c8dfb53039f7 |
|
|
||||||
| id | f91bd761-6407-46a6-b5fd-11a8a46e4983 |
|
|
||||||
| image | Example Cloud Ubuntu 14.04 x86_64 v2.5 (fb49d7e1-273b-...|
|
|
||||||
| key_name | team_key |
|
|
||||||
| name | example-fqdn-01.sys.example.com |
|
|
||||||
| os-extended-volumes:volumes_attached | [] |
|
|
||||||
| progress | 0 |
|
|
||||||
| project_id | 2daf82a578e9437cab396c888ff0ca57 |
|
|
||||||
| properties | |
|
|
||||||
| security_groups | [{u'name': u'default'}] |
|
|
||||||
| status | ACTIVE |
|
|
||||||
| updated | 2016-11-30T08:55:27Z |
|
|
||||||
| user_id | 8cbea24666ae49bbb8c1641f9b12d2d2 |
|
|
||||||
+--------------------------------------+----------------------------------------------------------+
|
|
||||||
|
|
||||||
#. Check the port connection using the netcat utility:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ nc -v -w 2 96.118.182.107 22
|
|
||||||
Ncat: Version 7.00 ( https://nmap.org/ncat )
|
|
||||||
Ncat: Connected to 96.118.182.107:22.
|
|
||||||
SSH-2.0-OpenSSH_6.6.1p1 Ubuntu-2ubuntu2.6
|
|
||||||
|
|
||||||
**Detach a port from an instance**
|
|
||||||
|
|
||||||
#. Find the port corresponding to the instance. For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack port list | grep -B1 96.118.182.107
|
|
||||||
|
|
||||||
| 731c3b28-3753-4e63-bae3-b58a52d6ccca | example-fqdn-01.sys.example.com | fa:16:3e:fb:65:fc | ip_address='96.118.182.107', subnet_id='4279c70a-7218-4c7e-94e5-7bd4c045644e' |
|
|
||||||
|
|
||||||
#. Run the :command:`openstack port set` command to remove the port from
|
|
||||||
the instance:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack port set 731c3b28-3753-4e63-bae3-b58a52d6ccca \
|
|
||||||
--device "" --device-owner "" --no-binding-profile
|
|
||||||
|
|
||||||
#. Delete the instance and create a new instance using the
|
|
||||||
``--nic port-id`` option.
|
|
||||||
|
|
||||||
**Retrieve an IP address when an instance is deleted before detaching
|
|
||||||
a port**
|
|
||||||
|
|
||||||
The following procedure is a possible workaround to retrieve an IP address
|
|
||||||
when an instance has been deleted with the port still attached:
|
|
||||||
|
|
||||||
#. Launch several neutron ports:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ for i in {0..10}; do openstack port create --network Public_AGILE \
|
|
||||||
ip-recovery; done
|
|
||||||
|
|
||||||
#. Check the ports for the lost IP address and update the name:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack port set 731c3b28-3753-4e63-bae3-b58a52d6ccca \
|
|
||||||
--name "don't delete"
|
|
||||||
|
|
||||||
#. Delete the ports that are not needed:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ for port in $(openstack port list | grep -i ip-recovery | \
|
|
||||||
awk '{print $2}'); do openstack port delete $port; done
|
|
||||||
|
|
||||||
#. If you still cannot find the lost IP address, repeat these steps
|
|
||||||
again.
|
|
||||||
|
|
||||||
.. _volumes:
|
|
||||||
|
|
||||||
Volumes
|
|
||||||
~~~~~~~
|
|
||||||
|
|
||||||
If the affected instances also had attached volumes, first generate a
|
|
||||||
list of instance and volume UUIDs:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
mysql> select nova.instances.uuid as instance_uuid,
|
|
||||||
cinder.volumes.id as volume_uuid, cinder.volumes.status,
|
|
||||||
cinder.volumes.attach_status, cinder.volumes.mountpoint,
|
|
||||||
cinder.volumes.display_name from cinder.volumes
|
|
||||||
inner join nova.instances on cinder.volumes.instance_uuid=nova.instances.uuid
|
|
||||||
where nova.instances.host = 'c01.example.com';
|
|
||||||
|
|
||||||
You should see a result similar to the following:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
+--------------+------------+-------+--------------+-----------+--------------+
|
|
||||||
|instance_uuid |volume_uuid |status |attach_status |mountpoint | display_name |
|
|
||||||
+--------------+------------+-------+--------------+-----------+--------------+
|
|
||||||
|9b969a05 |1f0fbf36 |in-use |attached |/dev/vdc | test |
|
|
||||||
+--------------+------------+-------+--------------+-----------+--------------+
|
|
||||||
1 row in set (0.00 sec)
|
|
||||||
|
|
||||||
Next, manually detach and reattach the volumes, where X is the proper
|
|
||||||
mount point:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server remove volume <instance_uuid> <volume_uuid>
|
|
||||||
# openstack server add volume <instance_uuid> <volume_uuid> --device /dev/vdX
|
|
||||||
|
|
||||||
Be sure that the instance has successfully booted and is at a login
|
|
||||||
screen before doing the above.
|
|
||||||
|
|
||||||
Total Compute Node Failure
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Compute nodes can fail the same way a cloud controller can fail. A
|
|
||||||
motherboard failure or some other type of hardware failure can cause an
|
|
||||||
entire compute node to go offline. When this happens, all instances
|
|
||||||
running on that compute node will not be available. Just like with a
|
|
||||||
cloud controller failure, if your infrastructure monitoring does not
|
|
||||||
detect a failed compute node, your users will notify you because of
|
|
||||||
their lost instances.
|
|
||||||
|
|
||||||
If a compute node fails and won't be fixed for a few hours (or at all),
|
|
||||||
you can relaunch all instances that are hosted on the failed node if you
|
|
||||||
use shared storage for ``/var/lib/nova/instances``.
|
|
||||||
|
|
||||||
To do this, generate a list of instance UUIDs that are hosted on the
|
|
||||||
failed node by running the following query on the nova database:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
mysql> select uuid from instances
|
|
||||||
where host = 'c01.example.com' and deleted = 0;
|
|
||||||
|
|
||||||
Next, update the nova database to indicate that all instances that used
|
|
||||||
to be hosted on c01.example.com are now hosted on c02.example.com:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
mysql> update instances set host = 'c02.example.com'
|
|
||||||
where host = 'c01.example.com' and deleted = 0;
|
|
||||||
|
|
||||||
If you're using the Networking service ML2 plug-in, update the
|
|
||||||
Networking service database to indicate that all ports that used to be
|
|
||||||
hosted on c01.example.com are now hosted on c02.example.com:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
mysql> update ml2_port_bindings set host = 'c02.example.com'
|
|
||||||
where host = 'c01.example.com';
|
|
||||||
mysql> update ml2_port_binding_levels set host = 'c02.example.com'
|
|
||||||
where host = 'c01.example.com';
|
|
||||||
|
|
||||||
After that, use the :command:`openstack` command to reboot all instances
|
|
||||||
that were on c01.example.com while regenerating their XML files at the same
|
|
||||||
time:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server reboot --hard <server>
|
|
||||||
|
|
||||||
Finally, reattach volumes using the same method described in the section
|
|
||||||
:ref:`volumes`.
|
|
||||||
|
|
||||||
/var/lib/nova/instances
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
It's worth mentioning this directory in the context of failed compute
|
|
||||||
nodes. This directory contains the libvirt KVM file-based disk images
|
|
||||||
for the instances that are hosted on that compute node. If you are not
|
|
||||||
running your cloud in a shared storage environment, this directory is
|
|
||||||
unique across all compute nodes.
|
|
||||||
|
|
||||||
``/var/lib/nova/instances`` contains two types of directories.
|
|
||||||
|
|
||||||
The first is the ``_base`` directory. This contains all the cached base
|
|
||||||
images from glance for each unique image that has been launched on that
|
|
||||||
compute node. Files ending in ``_20`` (or a different number) are the
|
|
||||||
ephemeral base images.
|
|
||||||
|
|
||||||
The other directories are titled ``instance-xxxxxxxx``. These
|
|
||||||
directories correspond to instances running on that compute node. The
|
|
||||||
files inside are related to one of the files in the ``_base`` directory.
|
|
||||||
They're essentially differential-based files containing only the changes
|
|
||||||
made from the original ``_base`` directory.
|
|
||||||
|
|
||||||
All files and directories in ``/var/lib/nova/instances`` are uniquely
|
|
||||||
named. The files in \_base are uniquely titled for the glance image that
|
|
||||||
they are based on, and the directory names ``instance-xxxxxxxx`` are
|
|
||||||
uniquely titled for that particular instance. For example, if you copy
|
|
||||||
all data from ``/var/lib/nova/instances`` on one compute node to
|
|
||||||
another, you do not overwrite any files or cause any damage to images
|
|
||||||
that have the same unique name, because they are essentially the same
|
|
||||||
file.
|
|
||||||
|
|
||||||
Although this method is not documented or supported, you can use it when
|
|
||||||
your compute node is permanently offline but you have instances locally
|
|
||||||
stored on it.
|
|
@ -1,29 +0,0 @@
|
|||||||
========================
|
|
||||||
Configuration Management
|
|
||||||
========================
|
|
||||||
|
|
||||||
Maintaining an OpenStack cloud requires that you manage multiple
|
|
||||||
physical servers, and this number might grow over time. Because managing
|
|
||||||
nodes manually is error prone, we strongly recommend that you use a
|
|
||||||
configuration-management tool. These tools automate the process of
|
|
||||||
ensuring that all your nodes are configured properly and encourage you
|
|
||||||
to maintain your configuration information (such as packages and
|
|
||||||
configuration options) in a version-controlled repository.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Several configuration-management tools are available, and this guide does
|
|
||||||
not recommend a specific one. The most popular ones in the OpenStack
|
|
||||||
community are:
|
|
||||||
|
|
||||||
* `Puppet <https://puppetlabs.com/>`_, with available `OpenStack
|
|
||||||
Puppet modules <https://github.com/puppetlabs/puppetlabs-openstack>`_
|
|
||||||
* `Ansible <https://www.ansible.com/>`_, with `OpenStack Ansible
|
|
||||||
<https://github.com/openstack/openstack-ansible>`_
|
|
||||||
* `Chef <http://www.getchef.com/chef/>`_, with available `OpenStack Chef
|
|
||||||
recipes <https://github.com/openstack/openstack-chef-repo>`_
|
|
||||||
|
|
||||||
Other newer configuration tools include `Juju <https://juju.ubuntu.com/>`_
|
|
||||||
and `Salt <http://www.saltstack.com/>`_; and more mature configuration
|
|
||||||
management tools include `CFEngine <http://cfengine.com/>`_ and `Bcfg2
|
|
||||||
<http://bcfg2.org/>`_.
|
|
@ -1,96 +0,0 @@
|
|||||||
===========================================================
|
|
||||||
Cloud Controller and Storage Proxy Failures and Maintenance
|
|
||||||
===========================================================
|
|
||||||
|
|
||||||
The cloud controller and storage proxy are very similar to each other
|
|
||||||
when it comes to expected and unexpected downtime. One of each server
|
|
||||||
type typically runs in the cloud, which makes them very noticeable when
|
|
||||||
they are not running.
|
|
||||||
|
|
||||||
For the cloud controller, the good news is if your cloud is using the
|
|
||||||
FlatDHCP multi-host HA network mode, existing instances and volumes
|
|
||||||
continue to operate while the cloud controller is offline. For the
|
|
||||||
storage proxy, however, no storage traffic is possible until it is back
|
|
||||||
up and running.
|
|
||||||
|
|
||||||
Planned Maintenance
|
|
||||||
~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
One way to plan for cloud controller or storage proxy maintenance is to
|
|
||||||
simply do it off-hours, such as at 1 a.m. or 2 a.m. This strategy
|
|
||||||
affects fewer users. If your cloud controller or storage proxy is too
|
|
||||||
important to have unavailable at any point in time, you must look into
|
|
||||||
high-availability options.
|
|
||||||
|
|
||||||
Rebooting a Cloud Controller or Storage Proxy
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
All in all, just issue the :command:`reboot` command. The operating system
|
|
||||||
cleanly shuts down services and then automatically reboots. If you want
|
|
||||||
to be very thorough, run your backup jobs just before you
|
|
||||||
reboot.
|
|
||||||
|
|
||||||
After a cloud controller reboots, ensure that all required services were
|
|
||||||
successfully started. The following commands use :command:`ps` and
|
|
||||||
:command:`grep` to determine if nova, glance, and keystone are currently
|
|
||||||
running:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# ps aux | grep nova-
|
|
||||||
# ps aux | grep glance-
|
|
||||||
# ps aux | grep keystone
|
|
||||||
# ps aux | grep cinder
|
|
||||||
|
|
||||||
Also check that all services are functioning. The following set of
|
|
||||||
commands sources the ``openrc`` file, then runs some basic glance, nova,
|
|
||||||
and openstack commands. If the commands work as expected, you can be
|
|
||||||
confident that those services are in working condition:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# . openrc
|
|
||||||
# openstack image list
|
|
||||||
# openstack server list
|
|
||||||
# openstack project list
|
|
||||||
|
|
||||||
For the storage proxy, ensure that the :term:`Object Storage service <Object
|
|
||||||
Storage service (swift)>` has resumed:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# ps aux | grep swift
|
|
||||||
|
|
||||||
Also check that it is functioning:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# swift stat
|
|
||||||
|
|
||||||
Total Cloud Controller Failure
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The cloud controller could completely fail if, for example, its
|
|
||||||
motherboard goes bad. Users will immediately notice the loss of a cloud
|
|
||||||
controller since it provides core functionality to your cloud
|
|
||||||
environment. If your infrastructure monitoring does not alert you that
|
|
||||||
your cloud controller has failed, your users definitely will.
|
|
||||||
Unfortunately, this is a rough situation. The cloud controller is an
|
|
||||||
integral part of your cloud. If you have only one controller, you will
|
|
||||||
have many missing services if it goes down.
|
|
||||||
|
|
||||||
To avoid this situation, create a highly available cloud controller
|
|
||||||
cluster. This is outside the scope of this document, but you can read
|
|
||||||
more in the `OpenStack High Availability
|
|
||||||
Guide <https://docs.openstack.org/ha-guide/index.html>`_.
|
|
||||||
|
|
||||||
The next best approach is to use a configuration-management tool, such
|
|
||||||
as Puppet, to automatically build a cloud controller. This should not
|
|
||||||
take more than 15 minutes if you have a spare server available. After
|
|
||||||
the controller rebuilds, restore any backups taken
|
|
||||||
(see :doc:`ops-backup-recovery`).
|
|
||||||
|
|
||||||
Also, in practice, the ``nova-compute`` services on the compute nodes do
|
|
||||||
not always reconnect cleanly to rabbitmq hosted on the controller when
|
|
||||||
it comes back up after a long reboot; a restart on the nova services on
|
|
||||||
the compute nodes is required.
|
|
@ -1,51 +0,0 @@
|
|||||||
=========
|
|
||||||
Databases
|
|
||||||
=========
|
|
||||||
|
|
||||||
Almost all OpenStack components have an underlying database to store
|
|
||||||
persistent information. Usually this database is MySQL. Normal MySQL
|
|
||||||
administration is applicable to these databases. OpenStack does not
|
|
||||||
configure the databases out of the ordinary. Basic administration
|
|
||||||
includes performance tweaking, high availability, backup, recovery, and
|
|
||||||
repairing. For more information, see a standard MySQL administration guide.
|
|
||||||
|
|
||||||
You can perform a couple of tricks with the database to either more
|
|
||||||
quickly retrieve information or fix a data inconsistency error—for
|
|
||||||
example, an instance was terminated, but the status was not updated in
|
|
||||||
the database. These tricks are discussed throughout this book.
|
|
||||||
|
|
||||||
Database Connectivity
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Review the component's configuration file to see how each OpenStack component
|
|
||||||
accesses its corresponding database. Look for a ``connection`` option. The
|
|
||||||
following command uses ``grep`` to display the SQL connection string for nova,
|
|
||||||
glance, cinder, and keystone:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# grep -hE "connection ?=" \
|
|
||||||
/etc/nova/nova.conf /etc/glance/glance-*.conf \
|
|
||||||
/etc/cinder/cinder.conf /etc/keystone/keystone.conf \
|
|
||||||
/etc/neutron/neutron.conf
|
|
||||||
connection = mysql+pymysql://nova:password@cloud.example.com/nova
|
|
||||||
connection = mysql+pymysql://glance:password@cloud.example.com/glance
|
|
||||||
connection = mysql+pymysql://glance:password@cloud.example.com/glance
|
|
||||||
connection = mysql+pymysql://cinder:password@cloud.example.com/cinder
|
|
||||||
connection = mysql+pymysql://keystone:password@cloud.example.com/keystone
|
|
||||||
connection = mysql+pymysql://neutron:password@cloud.example.com/neutron
|
|
||||||
|
|
||||||
The connection strings take this format:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
mysql+pymysql:// <username> : <password> @ <hostname> / <database name>
|
|
||||||
|
|
||||||
Performance and Optimizing
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
As your cloud grows, MySQL is utilized more and more. If you suspect
|
|
||||||
that MySQL might be becoming a bottleneck, you should start researching
|
|
||||||
MySQL optimization. The MySQL manual has an entire section dedicated to
|
|
||||||
this topic: `Optimization Overview
|
|
||||||
<http://dev.mysql.com/doc/refman/5.5/en/optimize-overview.html>`_.
|
|
@ -1,92 +0,0 @@
|
|||||||
=====================================
|
|
||||||
Determining Which Component Is Broken
|
|
||||||
=====================================
|
|
||||||
|
|
||||||
OpenStack's collection of different components interact with each other
|
|
||||||
strongly. For example, uploading an image requires interaction from
|
|
||||||
``nova-api``, ``glance-api``, ``glance-registry``, keystone, and
|
|
||||||
potentially ``swift-proxy``. As a result, it is sometimes difficult to
|
|
||||||
determine exactly where problems lie. Assisting in this is the purpose
|
|
||||||
of this section.
|
|
||||||
|
|
||||||
Tailing Logs
|
|
||||||
~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The first place to look is the log file related to the command you are
|
|
||||||
trying to run. For example, if ``openstack server list`` is failing, try
|
|
||||||
tailing a nova log file and running the command again:
|
|
||||||
|
|
||||||
Terminal 1:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# tail -f /var/log/nova/nova-api.log
|
|
||||||
|
|
||||||
Terminal 2:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server list
|
|
||||||
|
|
||||||
Look for any errors or traces in the log file. For more information, see
|
|
||||||
:doc:`ops-logging-monitoring`.
|
|
||||||
|
|
||||||
If the error indicates that the problem is with another component,
|
|
||||||
switch to tailing that component's log file. For example, if nova cannot
|
|
||||||
access glance, look at the ``glance-api`` log:
|
|
||||||
|
|
||||||
Terminal 1:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# tail -f /var/log/glance/api.log
|
|
||||||
|
|
||||||
Terminal 2:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack server list
|
|
||||||
|
|
||||||
Wash, rinse, and repeat until you find the core cause of the problem.
|
|
||||||
|
|
||||||
Running Daemons on the CLI
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Unfortunately, sometimes the error is not apparent from the log files.
|
|
||||||
In this case, switch tactics and use a different command; maybe run the
|
|
||||||
service directly on the command line. For example, if the ``glance-api``
|
|
||||||
service refuses to start and stay running, try launching the daemon from
|
|
||||||
the command line:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# sudo -u glance -H glance-api
|
|
||||||
|
|
||||||
This might print the error and cause of the problem.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
The ``-H`` flag is required when running the daemons with sudo
|
|
||||||
because some daemons will write files relative to the user's home
|
|
||||||
directory, and this write may fail if ``-H`` is left off.
|
|
||||||
|
|
||||||
.. Tip::
|
|
||||||
|
|
||||||
**Example of Complexity**
|
|
||||||
|
|
||||||
One morning, a compute node failed to run any instances. The log files
|
|
||||||
were a bit vague, claiming that a certain instance was unable to be
|
|
||||||
started. This ended up being a red herring because the instance was
|
|
||||||
simply the first instance in alphabetical order, so it was the first
|
|
||||||
instance that ``nova-compute`` would touch.
|
|
||||||
|
|
||||||
Further troubleshooting showed that libvirt was not running at all. This
|
|
||||||
made more sense. If libvirt wasn't running, then no instance could be
|
|
||||||
virtualized through KVM. Upon trying to start libvirt, it would silently
|
|
||||||
die immediately. The libvirt logs did not explain why.
|
|
||||||
|
|
||||||
Next, the ``libvirtd`` daemon was run on the command line. Finally a
|
|
||||||
helpful error message: it could not connect to d-bus. As ridiculous as
|
|
||||||
it sounds, libvirt, and thus ``nova-compute``, relies on d-bus and
|
|
||||||
somehow d-bus crashed. Simply starting d-bus set the entire chain back
|
|
||||||
on track, and soon everything was back up and running.
|
|
@ -1,64 +0,0 @@
|
|||||||
=====================
|
|
||||||
Working with Hardware
|
|
||||||
=====================
|
|
||||||
|
|
||||||
As for your initial deployment, you should ensure that all hardware is
|
|
||||||
appropriately burned in before adding it to production. Run software
|
|
||||||
that uses the hardware to its limits—maxing out RAM, CPU, disk, and
|
|
||||||
network. Many options are available, and normally double as benchmark
|
|
||||||
software, so you also get a good idea of the performance of your
|
|
||||||
system.
|
|
||||||
|
|
||||||
Adding a Compute Node
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
If you find that you have reached or are reaching the capacity limit of
|
|
||||||
your computing resources, you should plan to add additional compute
|
|
||||||
nodes. Adding more nodes is quite easy. The process for adding compute
|
|
||||||
nodes is the same as when the initial compute nodes were deployed to
|
|
||||||
your cloud: use an automated deployment system to bootstrap the
|
|
||||||
bare-metal server with the operating system and then have a
|
|
||||||
configuration-management system install and configure OpenStack Compute.
|
|
||||||
Once the Compute service has been installed and configured in the same
|
|
||||||
way as the other compute nodes, it automatically attaches itself to the
|
|
||||||
cloud. The cloud controller notices the new node(s) and begins
|
|
||||||
scheduling instances to launch there.
|
|
||||||
|
|
||||||
If your OpenStack Block Storage nodes are separate from your compute
|
|
||||||
nodes, the same procedure still applies because the same queuing and
|
|
||||||
polling system is used in both services.
|
|
||||||
|
|
||||||
We recommend that you use the same hardware for new compute and block
|
|
||||||
storage nodes. At the very least, ensure that the CPUs are similar in
|
|
||||||
the compute nodes to not break live migration.
|
|
||||||
|
|
||||||
Adding an Object Storage Node
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Adding a new object storage node is different from adding compute or
|
|
||||||
block storage nodes. You still want to initially configure the server by
|
|
||||||
using your automated deployment and configuration-management systems.
|
|
||||||
After that is done, you need to add the local disks of the object
|
|
||||||
storage node into the object storage ring. The exact command to do this
|
|
||||||
is the same command that was used to add the initial disks to the ring.
|
|
||||||
Simply rerun this command on the object storage proxy server for all
|
|
||||||
disks on the new object storage node. Once this has been done, rebalance
|
|
||||||
the ring and copy the resulting ring files to the other storage nodes.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
If your new object storage node has a different number of disks than
|
|
||||||
the original nodes have, the command to add the new node is
|
|
||||||
different from the original commands. These parameters vary from
|
|
||||||
environment to environment.
|
|
||||||
|
|
||||||
Replacing Components
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Failures of hardware are common in large-scale deployments such as an
|
|
||||||
infrastructure cloud. Consider your processes and balance time saving
|
|
||||||
against availability. For example, an Object Storage cluster can easily
|
|
||||||
live with dead disks in it for some period of time if it has sufficient
|
|
||||||
capacity. Or, if your compute installation is not full, you could
|
|
||||||
consider live migrating instances off a host with a RAM failure until
|
|
||||||
you have time to deal with the problem.
|
|
@ -1,54 +0,0 @@
|
|||||||
=====
|
|
||||||
HDWMY
|
|
||||||
=====
|
|
||||||
|
|
||||||
Here's a quick list of various to-do items for each hour, day, week,
|
|
||||||
month, and year. Please note that these tasks are neither required nor
|
|
||||||
definitive but helpful ideas:
|
|
||||||
|
|
||||||
Hourly
|
|
||||||
~~~~~~
|
|
||||||
|
|
||||||
* Check your monitoring system for alerts and act on them.
|
|
||||||
* Check your ticket queue for new tickets.
|
|
||||||
|
|
||||||
Daily
|
|
||||||
~~~~~
|
|
||||||
|
|
||||||
* Check for instances in a failed or weird state and investigate why.
|
|
||||||
* Check for security patches and apply them as needed.
|
|
||||||
|
|
||||||
Weekly
|
|
||||||
~~~~~~
|
|
||||||
|
|
||||||
* Check cloud usage:
|
|
||||||
|
|
||||||
* User quotas
|
|
||||||
* Disk space
|
|
||||||
* Image usage
|
|
||||||
* Large instances
|
|
||||||
* Network usage (bandwidth and IP usage)
|
|
||||||
|
|
||||||
* Verify your alert mechanisms are still working.
|
|
||||||
|
|
||||||
Monthly
|
|
||||||
~~~~~~~
|
|
||||||
|
|
||||||
* Check usage and trends over the past month.
|
|
||||||
* Check for user accounts that should be removed.
|
|
||||||
* Check for operator accounts that should be removed.
|
|
||||||
|
|
||||||
Quarterly
|
|
||||||
~~~~~~~~~
|
|
||||||
|
|
||||||
* Review usage and trends over the past quarter.
|
|
||||||
* Prepare any quarterly reports on usage and statistics.
|
|
||||||
* Review and plan any necessary cloud additions.
|
|
||||||
* Review and plan any major OpenStack upgrades.
|
|
||||||
|
|
||||||
Semiannually
|
|
||||||
~~~~~~~~~~~~
|
|
||||||
|
|
||||||
* Upgrade OpenStack.
|
|
||||||
* Clean up after an OpenStack upgrade (any unused or new services to be
|
|
||||||
aware of?).
|
|
@ -1,148 +0,0 @@
|
|||||||
========================
|
|
||||||
RabbitMQ troubleshooting
|
|
||||||
========================
|
|
||||||
|
|
||||||
This section provides tips on resolving common RabbitMQ issues.
|
|
||||||
|
|
||||||
RabbitMQ service hangs
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
It is quite common for the RabbitMQ service to hang when it is
|
|
||||||
restarted or stopped. Therefore, it is highly recommended that
|
|
||||||
you manually restart RabbitMQ on each controller node.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
The RabbitMQ service name may vary depending on your operating
|
|
||||||
system or vendor who supplies your RabbitMQ service.
|
|
||||||
|
|
||||||
#. Restart the RabbitMQ service on the first controller node. The
|
|
||||||
:command:`service rabbitmq-server restart` command may not work
|
|
||||||
in certain situations, so it is best to use:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service rabbitmq-server stop
|
|
||||||
# service rabbitmq-server start
|
|
||||||
|
|
||||||
|
|
||||||
#. If the service refuses to stop, then run the :command:`pkill` command
|
|
||||||
to stop the service, then restart the service:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# pkill -KILL -u rabbitmq
|
|
||||||
# service rabbitmq-server start
|
|
||||||
|
|
||||||
#. Verify RabbitMQ processes are running:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# ps -ef | grep rabbitmq
|
|
||||||
# rabbitmqctl list_queues
|
|
||||||
# rabbitmqctl list_queues 2>&1 | grep -i error
|
|
||||||
|
|
||||||
#. If there are errors, run the :command:`cluster_status` command to make sure
|
|
||||||
there are no partitions:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# rabbitmqctl cluster_status
|
|
||||||
|
|
||||||
For more information, see `RabbitMQ documentation
|
|
||||||
<https://www.rabbitmq.com/partitions.html>`_.
|
|
||||||
|
|
||||||
#. Go back to the first step and try restarting the RabbitMQ service again. If
|
|
||||||
you still have errors, remove the contents in the
|
|
||||||
``/var/lib/rabbitmq/mnesia/`` directory between stopping and starting the
|
|
||||||
RabbitMQ service.
|
|
||||||
|
|
||||||
#. If there are no errors, restart the RabbitMQ service on the next controller
|
|
||||||
node.
|
|
||||||
|
|
||||||
Since the Liberty release, OpenStack services will automatically recover from
|
|
||||||
a RabbitMQ outage. You should only consider restarting OpenStack services
|
|
||||||
after checking if RabbitMQ heartbeat functionality is enabled, and if
|
|
||||||
OpenStack services are not picking up messages from RabbitMQ queues.
|
|
||||||
|
|
||||||
RabbitMQ alerts
|
|
||||||
~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
If you receive alerts for RabbitMQ, take the following steps to troubleshoot
|
|
||||||
and resolve the issue:
|
|
||||||
|
|
||||||
#. Determine which servers the RabbitMQ alarms are coming from.
|
|
||||||
#. Attempt to boot a nova instance in the affected environment.
|
|
||||||
#. If you cannot launch an instance, continue to troubleshoot the issue.
|
|
||||||
#. Log in to each of the controller nodes for the affected environment, and
|
|
||||||
check the ``/var/log/rabbitmq`` log files for any reported issues.
|
|
||||||
#. Look for connection issues identified in the log files.
|
|
||||||
#. For each controller node in your environment, view the ``/etc/init.d``
|
|
||||||
directory to check it contains nova*, cinder*, neutron*, or
|
|
||||||
glance*. Also check RabbitMQ message queues that are growing without being
|
|
||||||
consumed which will indicate which OpenStack service is affected. Restart
|
|
||||||
the affected OpenStack service.
|
|
||||||
#. For each compute node your environment, view the ``/etc/init.d`` directory
|
|
||||||
and check if it contains nova*, cinder*, neutron*, or glance*, Also check
|
|
||||||
RabbitMQ message queues that are growing without being consumed which will
|
|
||||||
indicate which OpenStack services are affected. Restart the affected
|
|
||||||
OpenStack services.
|
|
||||||
#. Open OpenStack Dashboard and launch an instance. If the instance launches,
|
|
||||||
the issue is resolved.
|
|
||||||
#. If you cannot launch an instance, check the ``/var/log/rabbitmq`` log
|
|
||||||
files for reported connection issues.
|
|
||||||
#. Restart the RabbitMQ service on all of the controller nodes:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service rabbitmq-server stop
|
|
||||||
# service rabbitmq-server start
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
This step applies if you have already restarted only the OpenStack components, and
|
|
||||||
cannot connect to the RabbitMQ service.
|
|
||||||
|
|
||||||
#. Repeat steps 7-8.
|
|
||||||
|
|
||||||
Excessive database management memory consumption
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Since the Liberty release, OpenStack with RabbitMQ 3.4.x or 3.6.x has an issue
|
|
||||||
with the management database consuming the memory allocated to RabbitMQ.
|
|
||||||
This is caused by statistics collection and processing. When a single node
|
|
||||||
with RabbitMQ reaches its memory threshold, all exchange and queue processing
|
|
||||||
is halted until the memory alarm recovers.
|
|
||||||
|
|
||||||
To address this issue:
|
|
||||||
|
|
||||||
#. Check memory consumption:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# rabbitmqctl status
|
|
||||||
|
|
||||||
#. Edit the ``/etc/rabbitmq/rabbitmq.config`` configuration file, and change
|
|
||||||
the ``collect_statistics_interval`` parameter between 30000-60000
|
|
||||||
milliseconds. Alternatively you can turn off statistics collection by
|
|
||||||
setting ``collect_statistics`` parameter to "none".
|
|
||||||
|
|
||||||
File descriptor limits when scaling a cloud environment
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
A cloud environment that is scaled to a certain size will require the file
|
|
||||||
descriptor limits to be adjusted.
|
|
||||||
|
|
||||||
Run the :command:`rabbitmqctl status` to view the current file descriptor
|
|
||||||
limits:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
"{file_descriptors,
|
|
||||||
[{total_limit,3996},
|
|
||||||
{total_used,135},
|
|
||||||
{sockets_limit,3594},
|
|
||||||
{sockets_used,133}]},"
|
|
||||||
|
|
||||||
Adjust the appropriate limits in the
|
|
||||||
``/etc/security/limits.conf`` configuration file.
|
|
@ -1,92 +0,0 @@
|
|||||||
=========================================
|
|
||||||
What to do when things are running slowly
|
|
||||||
=========================================
|
|
||||||
|
|
||||||
When you are getting slow responses from various services, it can be
|
|
||||||
hard to know where to start looking. The first thing to check is the
|
|
||||||
extent of the slowness: is it specific to a single service, or varied
|
|
||||||
among different services? If your problem is isolated to a specific
|
|
||||||
service, it can temporarily be fixed by restarting the service, but that
|
|
||||||
is often only a fix for the symptom and not the actual problem.
|
|
||||||
|
|
||||||
This is a collection of ideas from experienced operators on common
|
|
||||||
things to look at that may be the cause of slowness. It is not, however,
|
|
||||||
designed to be an exhaustive list.
|
|
||||||
|
|
||||||
OpenStack Identity service
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
If OpenStack :term:`Identity service <Identity service (keystone)>` is
|
|
||||||
responding slowly, it could be due to the token table getting large.
|
|
||||||
This can be fixed by running the :command:`keystone-manage token_flush`
|
|
||||||
command.
|
|
||||||
|
|
||||||
Additionally, for Identity-related issues, try the tips
|
|
||||||
in :ref:`sql_backend`.
|
|
||||||
|
|
||||||
OpenStack Image service
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
OpenStack :term:`Image service <Image service (glance)>` can be slowed down
|
|
||||||
by things related to the Identity service, but the Image service itself can be
|
|
||||||
slowed down if connectivity to the back-end storage in use is slow or otherwise
|
|
||||||
problematic. For example, your back-end NFS server might have gone down.
|
|
||||||
|
|
||||||
OpenStack Block Storage service
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
OpenStack :term:`Block Storage service <Block Storage service (cinder)>` is
|
|
||||||
similar to the Image service, so start by checking Identity-related services,
|
|
||||||
and the back-end storage.
|
|
||||||
Additionally, both the Block Storage and Image services rely on AMQP and
|
|
||||||
SQL functionality, so consider these when debugging.
|
|
||||||
|
|
||||||
OpenStack Compute service
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Services related to OpenStack Compute are normally fairly fast and rely
|
|
||||||
on a couple of backend services: Identity for authentication and
|
|
||||||
authorization), and AMQP for interoperability. Any slowness related to
|
|
||||||
services is normally related to one of these. Also, as with all other
|
|
||||||
services, SQL is used extensively.
|
|
||||||
|
|
||||||
OpenStack Networking service
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Slowness in the OpenStack :term:`Networking service <Networking service
|
|
||||||
(neutron)>` can be caused by services that it relies upon, but it can
|
|
||||||
also be related to either physical or virtual networking. For example:
|
|
||||||
network namespaces that do not exist or are not tied to interfaces correctly;
|
|
||||||
DHCP daemons that have hung or are not running; a cable being physically
|
|
||||||
disconnected; a switch not being configured correctly. When debugging
|
|
||||||
Networking service problems, begin by verifying all physical networking
|
|
||||||
functionality (switch configuration, physical cabling, etc.). After the
|
|
||||||
physical networking is verified, check to be sure all of the Networking
|
|
||||||
services are running (neutron-server, neutron-dhcp-agent, etc.), then check
|
|
||||||
on AMQP and SQL back ends.
|
|
||||||
|
|
||||||
AMQP broker
|
|
||||||
~~~~~~~~~~~
|
|
||||||
|
|
||||||
Regardless of which AMQP broker you use, such as RabbitMQ, there are
|
|
||||||
common issues which not only slow down operations, but can also cause
|
|
||||||
real problems. Sometimes messages queued for services stay on the queues
|
|
||||||
and are not consumed. This can be due to dead or stagnant services and
|
|
||||||
can be commonly cleared up by either restarting the AMQP-related
|
|
||||||
services or the OpenStack service in question.
|
|
||||||
|
|
||||||
.. _sql_backend:
|
|
||||||
|
|
||||||
SQL back end
|
|
||||||
~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Whether you use SQLite or an RDBMS (such as MySQL), SQL interoperability
|
|
||||||
is essential to a functioning OpenStack environment. A large or
|
|
||||||
fragmented SQLite file can cause slowness when using files as a back
|
|
||||||
end. A locked or long-running query can cause delays for most RDBMS
|
|
||||||
services. In this case, do not kill the query immediately, but look into
|
|
||||||
it to see if it is a problem with something that is hung, or something
|
|
||||||
that is just taking a long time to run and needs to finish on its own.
|
|
||||||
The administration of an RDBMS is outside the scope of this document,
|
|
||||||
but it should be noted that a properly functioning RDBMS is essential to
|
|
||||||
most OpenStack services.
|
|
@ -1,91 +0,0 @@
|
|||||||
=====================================
|
|
||||||
Storage Node Failures and Maintenance
|
|
||||||
=====================================
|
|
||||||
|
|
||||||
Because of the high redundancy of Object Storage, dealing with object
|
|
||||||
storage node issues is a lot easier than dealing with compute node
|
|
||||||
issues.
|
|
||||||
|
|
||||||
Rebooting a Storage Node
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
If a storage node requires a reboot, simply reboot it. Requests for data
|
|
||||||
hosted on that node are redirected to other copies while the server is
|
|
||||||
rebooting.
|
|
||||||
|
|
||||||
Shutting Down a Storage Node
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
If you need to shut down a storage node for an extended period of time
|
|
||||||
(one or more days), consider removing the node from the storage ring.
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# swift-ring-builder account.builder remove <ip address of storage node>
|
|
||||||
# swift-ring-builder container.builder remove <ip address of storage node>
|
|
||||||
# swift-ring-builder object.builder remove <ip address of storage node>
|
|
||||||
# swift-ring-builder account.builder rebalance
|
|
||||||
# swift-ring-builder container.builder rebalance
|
|
||||||
# swift-ring-builder object.builder rebalance
|
|
||||||
|
|
||||||
Next, redistribute the ring files to the other nodes:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# for i in s01.example.com s02.example.com s03.example.com
|
|
||||||
> do
|
|
||||||
> scp *.ring.gz $i:/etc/swift
|
|
||||||
> done
|
|
||||||
|
|
||||||
These actions effectively take the storage node out of the storage
|
|
||||||
cluster.
|
|
||||||
|
|
||||||
When the node is able to rejoin the cluster, just add it back to the
|
|
||||||
ring. The exact syntax you use to add a node to your swift cluster with
|
|
||||||
``swift-ring-builder`` heavily depends on the original options used when
|
|
||||||
you originally created your cluster. Please refer back to those
|
|
||||||
commands.
|
|
||||||
|
|
||||||
Replacing a Swift Disk
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
If a hard drive fails in an Object Storage node, replacing it is
|
|
||||||
relatively easy. This assumes that your Object Storage environment is
|
|
||||||
configured correctly, where the data that is stored on the failed drive
|
|
||||||
is also replicated to other drives in the Object Storage environment.
|
|
||||||
|
|
||||||
This example assumes that ``/dev/sdb`` has failed.
|
|
||||||
|
|
||||||
First, unmount the disk:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# umount /dev/sdb
|
|
||||||
|
|
||||||
Next, physically remove the disk from the server and replace it with a
|
|
||||||
working disk.
|
|
||||||
|
|
||||||
Ensure that the operating system has recognized the new disk:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# dmesg | tail
|
|
||||||
|
|
||||||
You should see a message about ``/dev/sdb``.
|
|
||||||
|
|
||||||
Because it is recommended to not use partitions on a swift disk, simply
|
|
||||||
format the disk as a whole:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mkfs.xfs /dev/sdb
|
|
||||||
|
|
||||||
Finally, mount the disk:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mount -a
|
|
||||||
|
|
||||||
Swift should notice the new disk and that no data exists. It then begins
|
|
||||||
replicating the data to the disk from the other existing replicas.
|
|
@ -1,23 +0,0 @@
|
|||||||
====================================
|
|
||||||
Maintenance, Failures, and Debugging
|
|
||||||
====================================
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
ops-maintenance-controller.rst
|
|
||||||
ops-maintenance-compute.rst
|
|
||||||
ops-maintenance-storage.rst
|
|
||||||
ops-maintenance-complete.rst
|
|
||||||
ops-maintenance-configuration.rst
|
|
||||||
ops-maintenance-hardware.rst
|
|
||||||
ops-maintenance-database.rst
|
|
||||||
ops-maintenance-rabbitmq.rst
|
|
||||||
ops-maintenance-hdmwy.rst
|
|
||||||
ops-maintenance-determine.rst
|
|
||||||
ops-maintenance-slow.rst
|
|
||||||
ops-uninstall.rst
|
|
||||||
|
|
||||||
Downtime, whether planned or unscheduled, is a certainty when running a
|
|
||||||
cloud. This chapter aims to provide useful information for dealing
|
|
||||||
proactively, or reactively, with these occurrences.
|
|
@ -1,437 +0,0 @@
|
|||||||
==========
|
|
||||||
Monitoring
|
|
||||||
==========
|
|
||||||
|
|
||||||
There are two types of monitoring: watching for problems and watching
|
|
||||||
usage trends. The former ensures that all services are up and running,
|
|
||||||
creating a functional cloud. The latter involves monitoring resource
|
|
||||||
usage over time in order to make informed decisions about potential
|
|
||||||
bottlenecks and upgrades.
|
|
||||||
|
|
||||||
Process Monitoring
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
A basic type of alert monitoring is to simply check and see whether a
|
|
||||||
required process is running. For example, ensure that
|
|
||||||
the ``nova-api`` service is running on the cloud controller:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# ps aux | grep nova-api
|
|
||||||
nova 12786 0.0 0.0 37952 1312 ? Ss Feb11 0:00 su -s /bin/sh -c exec nova-api
|
|
||||||
--config-file=/etc/nova/nova.conf nova
|
|
||||||
nova 12787 0.0 0.1 135764 57400 ? S Feb11 0:01 /usr/bin/python
|
|
||||||
/usr/bin/nova-api --config-file=/etc/nova/nova.conf
|
|
||||||
nova 12792 0.0 0.0 96052 22856 ? S Feb11 0:01 /usr/bin/python
|
|
||||||
/usr/bin/nova-api --config-file=/etc/nova/nova.conf
|
|
||||||
nova 12793 0.0 0.3 290688 115516 ? S Feb11 1:23 /usr/bin/python
|
|
||||||
/usr/bin/nova-api --config-file=/etc/nova/nova.conf
|
|
||||||
nova 12794 0.0 0.2 248636 77068 ? S Feb11 0:04 /usr/bin/python
|
|
||||||
/usr/bin/nova-api --config-file=/etc/nova/nova.conf
|
|
||||||
root 24121 0.0 0.0 11688 912 pts/5 S+ 13:07 0:00 grep nova-api
|
|
||||||
|
|
||||||
|
|
||||||
The OpenStack processes that should be monitored depend on the specific
|
|
||||||
configuration of the environment, but can include:
|
|
||||||
|
|
||||||
**Compute service (nova)**
|
|
||||||
|
|
||||||
* nova-api
|
|
||||||
* nova-scheduler
|
|
||||||
* nova-conductor
|
|
||||||
* nova-novncproxy
|
|
||||||
* nova-compute
|
|
||||||
|
|
||||||
**Block Storage service (cinder)**
|
|
||||||
|
|
||||||
* cinder-volume
|
|
||||||
* cinder-api
|
|
||||||
* cinder-scheduler
|
|
||||||
|
|
||||||
**Networking service (neutron)**
|
|
||||||
|
|
||||||
* neutron-api
|
|
||||||
* neutron-server
|
|
||||||
* neutron-openvswitch-agent
|
|
||||||
* neutron-dhcp-agent
|
|
||||||
* neutron-l3-agent
|
|
||||||
* neutron-metadata-agent
|
|
||||||
|
|
||||||
**Image service (glance)**
|
|
||||||
|
|
||||||
* glance-api
|
|
||||||
* glance-registry
|
|
||||||
|
|
||||||
**Identity service (keystone)**
|
|
||||||
|
|
||||||
The keystone processes are run within Apache as WSGI applications.
|
|
||||||
|
|
||||||
Resource Alerting
|
|
||||||
~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Resource alerting provides notifications when one or more resources are
|
|
||||||
critically low. While the monitoring thresholds should be tuned to your
|
|
||||||
specific OpenStack environment, monitoring resource usage is not
|
|
||||||
specific to OpenStack at all—any generic type of alert will work
|
|
||||||
fine.
|
|
||||||
|
|
||||||
Some of the resources that you want to monitor include:
|
|
||||||
|
|
||||||
* Disk usage
|
|
||||||
* Server load
|
|
||||||
* Memory usage
|
|
||||||
* Network I/O
|
|
||||||
* Available vCPUs
|
|
||||||
|
|
||||||
Telemetry Service
|
|
||||||
~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The Telemetry service (:term:`ceilometer`) collects
|
|
||||||
metering and event data relating to OpenStack services. Data collected
|
|
||||||
by the Telemetry service could be used for billing. Depending on
|
|
||||||
deployment configuration, collected data may be accessible to users
|
|
||||||
based on the deployment configuration. The Telemetry service provides a
|
|
||||||
REST API documented at `ceilometer V2 Web API
|
|
||||||
<https://docs.openstack.org/developer/ceilometer/webapi/v2.html>`_. You can
|
|
||||||
read more about the module in the `OpenStack Administrator
|
|
||||||
Guide <https://docs.openstack.org/admin-guide/telemetry.html>`_ or
|
|
||||||
in the `developer
|
|
||||||
documentation <https://docs.openstack.org/developer/ceilometer>`_.
|
|
||||||
|
|
||||||
OpenStack Specific Resources
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Resources such as memory, disk, and CPU are generic resources that all
|
|
||||||
servers (even non-OpenStack servers) have and are important to the
|
|
||||||
overall health of the server. When dealing with OpenStack specifically,
|
|
||||||
these resources are important for a second reason: ensuring that enough
|
|
||||||
are available to launch instances. There are a few ways you can see
|
|
||||||
OpenStack resource usage.
|
|
||||||
The first is through the :command:`nova` command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack usage list
|
|
||||||
|
|
||||||
This command displays a list of how many instances a tenant has running
|
|
||||||
and some light usage statistics about the combined instances. This
|
|
||||||
command is useful for a quick overview of your cloud, but it doesn't
|
|
||||||
really get into a lot of details.
|
|
||||||
|
|
||||||
Next, the ``nova`` database contains three tables that store usage
|
|
||||||
information.
|
|
||||||
|
|
||||||
The ``nova.quotas`` and ``nova.quota_usages`` tables store quota
|
|
||||||
information. If a tenant's quota is different from the default quota
|
|
||||||
settings, its quota is stored in the ``nova.quotas`` table. For example:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
mysql> select project_id, resource, hard_limit from quotas;
|
|
||||||
+----------------------------------+-----------------------------+------------+
|
|
||||||
| project_id | resource | hard_limit |
|
|
||||||
+----------------------------------+-----------------------------+------------+
|
|
||||||
| 628df59f091142399e0689a2696f5baa | metadata_items | 128 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | injected_file_content_bytes | 10240 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | injected_files | 5 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | gigabytes | 1000 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | ram | 51200 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | floating_ips | 10 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | instances | 10 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | volumes | 10 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | cores | 20 |
|
|
||||||
+----------------------------------+-----------------------------+------------+
|
|
||||||
|
|
||||||
The ``nova.quota_usages`` table keeps track of how many resources the
|
|
||||||
tenant currently has in use:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
mysql> select project_id, resource, in_use from quota_usages where project_id like '628%';
|
|
||||||
+----------------------------------+--------------+--------+
|
|
||||||
| project_id | resource | in_use |
|
|
||||||
+----------------------------------+--------------+--------+
|
|
||||||
| 628df59f091142399e0689a2696f5baa | instances | 1 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | ram | 512 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | cores | 1 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | floating_ips | 1 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | volumes | 2 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | gigabytes | 12 |
|
|
||||||
| 628df59f091142399e0689a2696f5baa | images | 1 |
|
|
||||||
+----------------------------------+--------------+--------+
|
|
||||||
|
|
||||||
By comparing a tenant's hard limit with their current resource usage,
|
|
||||||
you can see their usage percentage. For example, if this tenant is using
|
|
||||||
1 floating IP out of 10, then they are using 10 percent of their
|
|
||||||
floating IP quota. Rather than doing the calculation manually, you can
|
|
||||||
use SQL or the scripting language of your choice and create a formatted
|
|
||||||
report:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
+----------------------------------+------------+-------------+---------------+
|
|
||||||
| some_tenant |
|
|
||||||
+-----------------------------------+------------+------------+---------------+
|
|
||||||
| Resource | Used | Limit | |
|
|
||||||
+-----------------------------------+------------+------------+---------------+
|
|
||||||
| cores | 1 | 20 | 5 % |
|
|
||||||
| floating_ips | 1 | 10 | 10 % |
|
|
||||||
| gigabytes | 12 | 1000 | 1 % |
|
|
||||||
| images | 1 | 4 | 25 % |
|
|
||||||
| injected_file_content_bytes | 0 | 10240 | 0 % |
|
|
||||||
| injected_file_path_bytes | 0 | 255 | 0 % |
|
|
||||||
| injected_files | 0 | 5 | 0 % |
|
|
||||||
| instances | 1 | 10 | 10 % |
|
|
||||||
| key_pairs | 0 | 100 | 0 % |
|
|
||||||
| metadata_items | 0 | 128 | 0 % |
|
|
||||||
| ram | 512 | 51200 | 1 % |
|
|
||||||
| reservation_expire | 0 | 86400 | 0 % |
|
|
||||||
| security_group_rules | 0 | 20 | 0 % |
|
|
||||||
| security_groups | 0 | 10 | 0 % |
|
|
||||||
| volumes | 2 | 10 | 20 % |
|
|
||||||
+-----------------------------------+------------+------------+---------------+
|
|
||||||
|
|
||||||
The preceding information was generated by using a custom script that
|
|
||||||
can be found on
|
|
||||||
`GitHub <https://github.com/cybera/novac/blob/dev/libexec/novac-quota-report>`_.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
This script is specific to a certain OpenStack installation and must
|
|
||||||
be modified to fit your environment. However, the logic should
|
|
||||||
easily be transferable.
|
|
||||||
|
|
||||||
Intelligent Alerting
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Intelligent alerting can be thought of as a form of continuous
|
|
||||||
integration for operations. For example, you can easily check to see
|
|
||||||
whether the Image service is up and running by ensuring that
|
|
||||||
the ``glance-api`` and ``glance-registry`` processes are running or by
|
|
||||||
seeing whether ``glance-api`` is responding on port 9292.
|
|
||||||
|
|
||||||
But how can you tell whether images are being successfully uploaded to
|
|
||||||
the Image service? Maybe the disk that Image service is storing the
|
|
||||||
images on is full or the S3 back end is down. You could naturally check
|
|
||||||
this by doing a quick image upload:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
#!/bin/bash
|
|
||||||
#
|
|
||||||
# assumes that reasonable credentials have been stored at
|
|
||||||
# /root/auth
|
|
||||||
|
|
||||||
|
|
||||||
. /root/openrc
|
|
||||||
wget http://download.cirros-cloud.net/0.3.5/cirros-0.3.5-x86_64-disk.img
|
|
||||||
openstack image create --name='cirros image' --public \
|
|
||||||
--container-format=bare --disk-format=qcow2 \
|
|
||||||
--file cirros-0.3.5-x86_64-disk.img
|
|
||||||
|
|
||||||
By taking this script and rolling it into an alert for your monitoring
|
|
||||||
system (such as Nagios), you now have an automated way of ensuring that
|
|
||||||
image uploads to the Image Catalog are working.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
You must remove the image after each test. Even better, test whether
|
|
||||||
you can successfully delete an image from the Image service.
|
|
||||||
|
|
||||||
Intelligent alerting takes considerably more time to plan and implement
|
|
||||||
than the other alerts described in this chapter. A good outline to
|
|
||||||
implement intelligent alerting is:
|
|
||||||
|
|
||||||
- Review common actions in your cloud.
|
|
||||||
|
|
||||||
- Create ways to automatically test these actions.
|
|
||||||
|
|
||||||
- Roll these tests into an alerting system.
|
|
||||||
|
|
||||||
Some other examples for Intelligent Alerting include:
|
|
||||||
|
|
||||||
- Can instances launch and be destroyed?
|
|
||||||
|
|
||||||
- Can users be created?
|
|
||||||
|
|
||||||
- Can objects be stored and deleted?
|
|
||||||
|
|
||||||
- Can volumes be created and destroyed?
|
|
||||||
|
|
||||||
Trending
|
|
||||||
~~~~~~~~
|
|
||||||
|
|
||||||
Trending can give you great insight into how your cloud is performing
|
|
||||||
day to day. You can learn, for example, if a busy day was simply a rare
|
|
||||||
occurrence or if you should start adding new compute nodes.
|
|
||||||
|
|
||||||
Trending takes a slightly different approach than alerting. While
|
|
||||||
alerting is interested in a binary result (whether a check succeeds or
|
|
||||||
fails), trending records the current state of something at a certain
|
|
||||||
point in time. Once enough points in time have been recorded, you can
|
|
||||||
see how the value has changed over time.
|
|
||||||
|
|
||||||
All of the alert types mentioned earlier can also be used for trend
|
|
||||||
reporting. Some other trend examples include:
|
|
||||||
|
|
||||||
* The number of instances on each compute node
|
|
||||||
* The types of flavors in use
|
|
||||||
* The number of volumes in use
|
|
||||||
* The number of Object Storage requests each hour
|
|
||||||
* The number of ``nova-api`` requests each hour
|
|
||||||
* The I/O statistics of your storage services
|
|
||||||
|
|
||||||
As an example, recording ``nova-api`` usage can allow you to track the
|
|
||||||
need to scale your cloud controller. By keeping an eye on ``nova-api``
|
|
||||||
requests, you can determine whether you need to spawn more ``nova-api``
|
|
||||||
processes or go as far as introducing an entirely new server to run
|
|
||||||
``nova-api``. To get an approximate count of the requests, look for
|
|
||||||
standard INFO messages in ``/var/log/nova/nova-api.log``:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# grep INFO /var/log/nova/nova-api.log | wc
|
|
||||||
|
|
||||||
You can obtain further statistics by looking for the number of
|
|
||||||
successful requests:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# grep " 200 " /var/log/nova/nova-api.log | wc
|
|
||||||
|
|
||||||
By running this command periodically and keeping a record of the result,
|
|
||||||
you can create a trending report over time that shows whether your
|
|
||||||
``nova-api`` usage is increasing, decreasing, or keeping steady.
|
|
||||||
|
|
||||||
A tool such as **collectd** can be used to store this information. While
|
|
||||||
collectd is out of the scope of this book, a good starting point would
|
|
||||||
be to use collectd to store the result as a COUNTER data type. More
|
|
||||||
information can be found in `collectd's
|
|
||||||
documentation <https://collectd.org/wiki/index.php/Data_source>`_.
|
|
||||||
|
|
||||||
|
|
||||||
Monitoring Tools
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Nagios
|
|
||||||
------
|
|
||||||
|
|
||||||
|
|
||||||
Nagios is an open source monitoring service. It is capable of executing
|
|
||||||
arbitrary commands to check the status of server and network services,
|
|
||||||
remotely executing arbitrary commands directly on servers, and allowing
|
|
||||||
servers to push notifications back in the form of passive monitoring.
|
|
||||||
Nagios has been around since 1999. Although newer monitoring services
|
|
||||||
are available, Nagios is a tried-and-true systems administration
|
|
||||||
staple.
|
|
||||||
|
|
||||||
You can create automated alerts for critical processes by using Nagios
|
|
||||||
and NRPE. For example, to ensure that the ``nova-compute`` process is
|
|
||||||
running on the compute nodes, create an alert on your Nagios server:
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
define service {
|
|
||||||
host_name c01.example.com
|
|
||||||
check_command check_nrpe_1arg!check_nova-compute
|
|
||||||
use generic-service
|
|
||||||
notification_period 24x7
|
|
||||||
contact_groups sysadmins
|
|
||||||
service_description nova-compute
|
|
||||||
}
|
|
||||||
|
|
||||||
On the Compute node, create the following NRPE
|
|
||||||
configuration:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
command[check_nova-compute]=/usr/lib/nagios/plugins/check_procs -c 1: -a nova-compute
|
|
||||||
|
|
||||||
Nagios checks that at least one ``nova-compute`` service is running at
|
|
||||||
all times.
|
|
||||||
|
|
||||||
For resource alerting, for example, monitor disk capacity on a compute node
|
|
||||||
with Nagios, add the following to your Nagios configuration:
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
define service {
|
|
||||||
host_name c01.example.com
|
|
||||||
check_command check_nrpe!check_all_disks!20% 10%
|
|
||||||
use generic-service
|
|
||||||
contact_groups sysadmins
|
|
||||||
service_description Disk
|
|
||||||
}
|
|
||||||
|
|
||||||
On the compute node, add the following to your NRPE configuration:
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
command[check_all_disks]=/usr/lib/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -e
|
|
||||||
|
|
||||||
Nagios alerts you with a `WARNING` when any disk on the compute node is 80
|
|
||||||
percent full and `CRITICAL` when 90 percent is full.
|
|
||||||
|
|
||||||
StackTach
|
|
||||||
---------
|
|
||||||
|
|
||||||
StackTach is a tool that collects and reports the notifications sent by
|
|
||||||
nova. Notifications are essentially the same as logs but can be much
|
|
||||||
more detailed. Nearly all OpenStack components are capable of generating
|
|
||||||
notifications when significant events occur. Notifications are messages
|
|
||||||
placed on the OpenStack queue (generally RabbitMQ) for consumption by
|
|
||||||
downstream systems. An overview of notifications can be found at `System
|
|
||||||
Usage
|
|
||||||
Data <https://wiki.openstack.org/wiki/SystemUsageData>`_.
|
|
||||||
|
|
||||||
To enable nova to send notifications, add the following to the
|
|
||||||
``nova.conf`` configuration file:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
notification_topics=monitor
|
|
||||||
notification_driver=messagingv2
|
|
||||||
|
|
||||||
Once nova is sending notifications, install and configure StackTach.
|
|
||||||
StackTach works for queue consumption and pipeline processing are
|
|
||||||
configured to read these notifications from RabbitMQ servers and store
|
|
||||||
them in a database. Users can inquire on instances, requests, and servers
|
|
||||||
by using the browser interface or command-line tool,
|
|
||||||
`Stacky <https://github.com/rackerlabs/stacky>`_. Since StackTach is
|
|
||||||
relatively new and constantly changing, installation instructions
|
|
||||||
quickly become outdated. Refer to the `StackTach Git
|
|
||||||
repository <https://git.openstack.org/cgit/openstack/stacktach>`_ for
|
|
||||||
instructions as well as a demostration video. Additional details on the latest
|
|
||||||
developments can be discovered at the `official
|
|
||||||
page <http://stacktach.com/>`_
|
|
||||||
|
|
||||||
Logstash
|
|
||||||
~~~~~~~~
|
|
||||||
|
|
||||||
Logstash is a high performance indexing and search engine for logs. Logs
|
|
||||||
from Jenkins test runs are sent to logstash where they are indexed and
|
|
||||||
stored. Logstash facilitates reviewing logs from multiple sources in a
|
|
||||||
single test run, searching for errors or particular events within a test
|
|
||||||
run, and searching for log event trends across test runs.
|
|
||||||
|
|
||||||
There are four major layers in Logstash setup which are:
|
|
||||||
|
|
||||||
* Log Pusher
|
|
||||||
* Log Indexer
|
|
||||||
* ElasticSearch
|
|
||||||
* Kibana
|
|
||||||
|
|
||||||
Each layer scales horizontally. As the number of logs grows you can add
|
|
||||||
more log pushers, more Logstash indexers, and more ElasticSearch nodes.
|
|
||||||
|
|
||||||
Logpusher is a pair of Python scripts that first listens to Jenkins
|
|
||||||
build events, then converts them into Gearman jobs. Gearman provides a
|
|
||||||
generic application framework to farm out work to other machines or
|
|
||||||
processes that are better suited to do the work. It allows you to do
|
|
||||||
work in parallel, to load balance processing, and to call functions
|
|
||||||
between languages. Later, Logpusher performs Gearman jobs to push log
|
|
||||||
files into logstash. Logstash indexer reads these log events, filters
|
|
||||||
them to remove unwanted lines, collapse multiple events together, and
|
|
||||||
parses useful information before shipping them to ElasticSearch for
|
|
||||||
storage and indexing. Kibana is a logstash oriented web client for
|
|
||||||
ElasticSearch.
|
|
@ -1,252 +0,0 @@
|
|||||||
=================================================
|
|
||||||
Planning for deploying and provisioning OpenStack
|
|
||||||
=================================================
|
|
||||||
|
|
||||||
The decisions you make with respect to provisioning and deployment will
|
|
||||||
affect your maintenance of the cloud. Your configuration management will be
|
|
||||||
able to evolve over time. However, more thought and design need to be done
|
|
||||||
for upfront choices about deployment, disk partitioning, and network
|
|
||||||
configuration.
|
|
||||||
|
|
||||||
A critical part of a cloud's scalability is the amount of effort that it
|
|
||||||
takes to run your cloud. To minimize the operational cost of running
|
|
||||||
your cloud, set up and use an automated deployment and configuration
|
|
||||||
infrastructure with a configuration management system, such as :term:`Puppet`
|
|
||||||
or :term:`Chef`. Combined, these systems greatly reduce manual effort and the
|
|
||||||
chance for operator error.
|
|
||||||
|
|
||||||
This infrastructure includes systems to automatically install the
|
|
||||||
operating system's initial configuration and later coordinate the
|
|
||||||
configuration of all services automatically and centrally, which reduces
|
|
||||||
both manual effort and the chance for error. Examples include Ansible,
|
|
||||||
CFEngine, Chef, Puppet, and Salt. You can even use OpenStack to deploy
|
|
||||||
OpenStack, named TripleO (OpenStack On OpenStack).
|
|
||||||
|
|
||||||
Automated deployment
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
An automated deployment system installs and configures operating systems
|
|
||||||
on new servers, without intervention, after the absolute minimum amount
|
|
||||||
of manual work, including physical racking, MAC-to-IP assignment, and
|
|
||||||
power configuration. Typically, solutions rely on wrappers around PXE
|
|
||||||
boot and TFTP servers for the basic operating system install and then
|
|
||||||
hand off to an automated configuration management system.
|
|
||||||
|
|
||||||
Both Ubuntu and Red Hat Enterprise Linux include mechanisms for
|
|
||||||
configuring the operating system, including preseed and kickstart, that
|
|
||||||
you can use after a network boot. Typically, these are used to bootstrap
|
|
||||||
an automated configuration system. Alternatively, you can use an
|
|
||||||
image-based approach for deploying the operating system, such as
|
|
||||||
systemimager. You can use both approaches with a virtualized
|
|
||||||
infrastructure, such as when you run VMs to separate your control
|
|
||||||
services and physical infrastructure.
|
|
||||||
|
|
||||||
When you create a deployment plan, focus on a few vital areas because
|
|
||||||
they are very hard to modify post deployment. The next two sections talk
|
|
||||||
about configurations for:
|
|
||||||
|
|
||||||
- Disk partitioning and disk array setup for scalability
|
|
||||||
|
|
||||||
- Networking configuration just for PXE booting
|
|
||||||
|
|
||||||
Disk partitioning and RAID
|
|
||||||
--------------------------
|
|
||||||
|
|
||||||
At the very base of any operating system are the hard drives on which
|
|
||||||
the operating system (OS) is installed.
|
|
||||||
|
|
||||||
You must complete the following configurations on the server's hard
|
|
||||||
drives:
|
|
||||||
|
|
||||||
- Partitioning, which provides greater flexibility for layout of
|
|
||||||
operating system and swap space, as described below.
|
|
||||||
|
|
||||||
- Adding to a RAID array (RAID stands for redundant array of
|
|
||||||
independent disks), based on the number of disks you have available,
|
|
||||||
so that you can add capacity as your cloud grows. Some options are
|
|
||||||
described in more detail below.
|
|
||||||
|
|
||||||
The simplest option to get started is to use one hard drive with two
|
|
||||||
partitions:
|
|
||||||
|
|
||||||
- File system to store files and directories, where all the data lives,
|
|
||||||
including the root partition that starts and runs the system.
|
|
||||||
|
|
||||||
- Swap space to free up memory for processes, as an independent area of
|
|
||||||
the physical disk used only for swapping and nothing else.
|
|
||||||
|
|
||||||
RAID is not used in this simplistic one-drive setup because generally
|
|
||||||
for production clouds, you want to ensure that if one disk fails,
|
|
||||||
another can take its place. Instead, for production, use more than one
|
|
||||||
disk. The number of disks determine what types of RAID arrays to build.
|
|
||||||
|
|
||||||
We recommend that you choose one of the following multiple disk options:
|
|
||||||
|
|
||||||
Option 1
|
|
||||||
Partition all drives in the same way in a horizontal fashion, as
|
|
||||||
shown in :ref:`partition_setup`.
|
|
||||||
|
|
||||||
With this option, you can assign different partitions to different
|
|
||||||
RAID arrays. You can allocate partition 1 of disk one and two to the
|
|
||||||
``/boot`` partition mirror. You can make partition 2 of all disks
|
|
||||||
the root partition mirror. You can use partition 3 of all disks for
|
|
||||||
a ``cinder-volumes`` LVM partition running on a RAID 10 array.
|
|
||||||
|
|
||||||
.. _partition_setup:
|
|
||||||
|
|
||||||
.. figure:: figures/osog_0201.png
|
|
||||||
|
|
||||||
Partition setup of drives
|
|
||||||
|
|
||||||
While you might end up with unused partitions, such as partition 1
|
|
||||||
in disk three and four of this example, this option allows for
|
|
||||||
maximum utilization of disk space. I/O performance might be an issue
|
|
||||||
as a result of all disks being used for all tasks.
|
|
||||||
|
|
||||||
Option 2
|
|
||||||
Add all raw disks to one large RAID array, either hardware or
|
|
||||||
software based. You can partition this large array with the boot,
|
|
||||||
root, swap, and LVM areas. This option is simple to implement and
|
|
||||||
uses all partitions. However, disk I/O might suffer.
|
|
||||||
|
|
||||||
Option 3
|
|
||||||
Dedicate entire disks to certain partitions. For example, you could
|
|
||||||
allocate disk one and two entirely to the boot, root, and swap
|
|
||||||
partitions under a RAID 1 mirror. Then, allocate disk three and four
|
|
||||||
entirely to the LVM partition, also under a RAID 1 mirror. Disk I/O
|
|
||||||
should be better because I/O is focused on dedicated tasks. However,
|
|
||||||
the LVM partition is much smaller.
|
|
||||||
|
|
||||||
.. tip::
|
|
||||||
|
|
||||||
You may find that you can automate the partitioning itself. For
|
|
||||||
example, MIT uses `Fully Automatic Installation
|
|
||||||
(FAI) <http://fai-project.org/>`_ to do the initial PXE-based
|
|
||||||
partition and then install using a combination of min/max and
|
|
||||||
percentage-based partitioning.
|
|
||||||
|
|
||||||
As with most architecture choices, the right answer depends on your
|
|
||||||
environment. If you are using existing hardware, you know the disk
|
|
||||||
density of your servers and can determine some decisions based on the
|
|
||||||
options above. If you are going through a procurement process, your
|
|
||||||
user's requirements also help you determine hardware purchases. Here are
|
|
||||||
some examples from a private cloud providing web developers custom
|
|
||||||
environments at AT&T. This example is from a specific deployment, so
|
|
||||||
your existing hardware or procurement opportunity may vary from this.
|
|
||||||
AT&T uses three types of hardware in its deployment:
|
|
||||||
|
|
||||||
- Hardware for controller nodes, used for all stateless OpenStack API
|
|
||||||
services. About 32–64 GB memory, small attached disk, one processor,
|
|
||||||
varied number of cores, such as 6–12.
|
|
||||||
|
|
||||||
- Hardware for compute nodes. Typically 256 or 144 GB memory, two
|
|
||||||
processors, 24 cores. 4–6 TB direct attached storage, typically in a
|
|
||||||
RAID 5 configuration.
|
|
||||||
|
|
||||||
- Hardware for storage nodes. Typically for these, the disk space is
|
|
||||||
optimized for the lowest cost per GB of storage while maintaining
|
|
||||||
rack-space efficiency.
|
|
||||||
|
|
||||||
Again, the right answer depends on your environment. You have to make
|
|
||||||
your decision based on the trade-offs between space utilization,
|
|
||||||
simplicity, and I/O performance.
|
|
||||||
|
|
||||||
Network configuration
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
.. TODO Reference to networking sections in the following paragraph.
|
|
||||||
|
|
||||||
Network configuration is a very large topic that spans multiple areas of
|
|
||||||
this book. For now, make sure that your servers can PXE boot and
|
|
||||||
successfully communicate with the deployment server.
|
|
||||||
|
|
||||||
For example, you usually cannot configure NICs for VLANs when PXE
|
|
||||||
booting. Additionally, you usually cannot PXE boot with bonded NICs. If
|
|
||||||
you run into this scenario, consider using a simple 1 GB switch in a
|
|
||||||
private network on which only your cloud communicates.
|
|
||||||
|
|
||||||
Automated configuration
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The purpose of automatic configuration management is to establish and
|
|
||||||
maintain the consistency of a system without using human intervention.
|
|
||||||
You want to maintain consistency in your deployments so that you can
|
|
||||||
have the same cloud every time, repeatably. Proper use of automatic
|
|
||||||
configuration-management tools ensures that components of the cloud
|
|
||||||
systems are in particular states, in addition to simplifying deployment,
|
|
||||||
and configuration change propagation.
|
|
||||||
|
|
||||||
These tools also make it possible to test and roll back changes, as they
|
|
||||||
are fully repeatable. Conveniently, a large body of work has been done
|
|
||||||
by the OpenStack community in this space. Puppet, a configuration
|
|
||||||
management tool, even provides official modules for OpenStack projects
|
|
||||||
in an OpenStack infrastructure system known as `Puppet
|
|
||||||
OpenStack <https://wiki.openstack.org/wiki/Puppet>`_. Chef
|
|
||||||
configuration management is provided within
|
|
||||||
https://git.openstack.org/cgit/openstack/openstack-chef-repo. Additional
|
|
||||||
configuration management systems include Juju, Ansible, and Salt. Also,
|
|
||||||
PackStack is a command-line utility for Red Hat Enterprise Linux and
|
|
||||||
derivatives that uses Puppet modules to support rapid deployment of
|
|
||||||
OpenStack on existing servers over an SSH connection.
|
|
||||||
|
|
||||||
An integral part of a configuration-management system is the item that
|
|
||||||
it controls. You should carefully consider all of the items that you
|
|
||||||
want, or do not want, to be automatically managed. For example, you may
|
|
||||||
not want to automatically format hard drives with user data.
|
|
||||||
|
|
||||||
Remote management
|
|
||||||
~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
In our experience, most operators don't sit right next to the servers
|
|
||||||
running the cloud, and many don't necessarily enjoy visiting the data
|
|
||||||
center. OpenStack should be entirely remotely configurable, but
|
|
||||||
sometimes not everything goes according to plan.
|
|
||||||
|
|
||||||
In this instance, having an out-of-band access into nodes running
|
|
||||||
OpenStack components is a boon. The IPMI protocol is the de facto
|
|
||||||
standard here, and acquiring hardware that supports it is highly
|
|
||||||
recommended to achieve that lights-out data center aim.
|
|
||||||
|
|
||||||
In addition, consider remote power control as well. While IPMI usually
|
|
||||||
controls the server's power state, having remote access to the PDU that
|
|
||||||
the server is plugged into can really be useful for situations when
|
|
||||||
everything seems wedged.
|
|
||||||
|
|
||||||
Other considerations
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
.. TODO In the first paragraph, reference to use case sections.
|
|
||||||
|
|
||||||
You can save time by understanding the use cases for the cloud you want
|
|
||||||
to create. Use cases for OpenStack are varied. Some include object
|
|
||||||
storage only; others require preconfigured compute resources to speed
|
|
||||||
development-environment set up; and others need fast provisioning of
|
|
||||||
compute resources that are already secured per tenant with private
|
|
||||||
networks. Your users may have need for highly redundant servers to make
|
|
||||||
sure their legacy applications continue to run. Perhaps a goal would be
|
|
||||||
to architect these legacy applications so that they run on multiple
|
|
||||||
instances in a cloudy, fault-tolerant way, but not make it a goal to add
|
|
||||||
to those clusters over time. Your users may indicate that they need
|
|
||||||
scaling considerations because of heavy Windows server use.
|
|
||||||
|
|
||||||
You can save resources by looking at the best fit for the hardware you
|
|
||||||
have in place already. You might have some high-density storage hardware
|
|
||||||
available. You could format and repurpose those servers for OpenStack
|
|
||||||
Object Storage. All of these considerations and input from users help
|
|
||||||
you build your use case and your deployment plan.
|
|
||||||
|
|
||||||
.. tip::
|
|
||||||
|
|
||||||
For further research about OpenStack deployment, investigate the
|
|
||||||
supported and documented preconfigured, prepackaged installers for
|
|
||||||
OpenStack from companies such as
|
|
||||||
`Canonical <https://www.ubuntu.com/cloud/openstack>`_,
|
|
||||||
`Cisco <http://www.cisco.com/c/en/us/solutions/data-center-virtualization/openstack-at-cisco/index.html>`_,
|
|
||||||
`Cloudscaling <http://www.cloudscaling.com>`_,
|
|
||||||
`IBM <http://www-03.ibm.com/software/products/en/ibm-cloud-orchestrator>`_,
|
|
||||||
`Metacloud <http://www.cisco.com/c/en/us/products/cloud-systems-management/metacloud/index.html>`_,
|
|
||||||
`Mirantis <https://www.mirantis.com>`_,
|
|
||||||
`Rackspace <https://www.rackspace.com/cloud/private>`_,
|
|
||||||
`Red Hat <https://www.redhat.com/openstack>`_,
|
|
||||||
`SUSE <https://www.suse.com/products/suse-openstack-cloud>`_,
|
|
||||||
and `SwiftStack <https://www.swiftstack.com>`_.
|
|
@ -1,11 +0,0 @@
|
|||||||
=======
|
|
||||||
Summary
|
|
||||||
=======
|
|
||||||
|
|
||||||
One key element of systems administration that is often overlooked is
|
|
||||||
that end users are the reason systems administrators exist. Don't go the
|
|
||||||
BOFH route and terminate every user who causes an alert to go off. Work
|
|
||||||
with users to understand what they're trying to accomplish and see how
|
|
||||||
your environment can better assist them in achieving their goals. Meet
|
|
||||||
your users needs by organizing your users into projects, applying
|
|
||||||
policies, managing quotas, and working with them.
|
|
@ -1,33 +0,0 @@
|
|||||||
===========================
|
|
||||||
Managing Projects and Users
|
|
||||||
===========================
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
|
|
||||||
ops-projects.rst
|
|
||||||
ops-quotas.rst
|
|
||||||
ops-users.rst
|
|
||||||
ops-projects-users-summary.rst
|
|
||||||
|
|
||||||
An OpenStack cloud does not have much value without users. This chapter
|
|
||||||
covers topics that relate to managing users, projects, and quotas. This
|
|
||||||
chapter describes users and projects as described by version 2 of the
|
|
||||||
OpenStack Identity API.
|
|
||||||
|
|
||||||
Projects or Tenants?
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
In OpenStack user interfaces and documentation, a group of users is
|
|
||||||
referred to as a :term:`project` or :term:`tenant`.
|
|
||||||
These terms are interchangeable.
|
|
||||||
|
|
||||||
The initial implementation of OpenStack Compute had its own
|
|
||||||
authentication system and used the term ``project``. When authentication
|
|
||||||
moved into the OpenStack Identity (keystone) project, it used the term
|
|
||||||
``tenant`` to refer to a group of users. Because of this legacy, some of
|
|
||||||
the OpenStack tools refer to projects and some refer to tenants.
|
|
||||||
|
|
||||||
.. tip::
|
|
||||||
|
|
||||||
This guide uses the term ``project``, unless an example shows
|
|
||||||
interaction with a tool that uses the term ``tenant``.
|
|
@ -1,44 +0,0 @@
|
|||||||
=================
|
|
||||||
Managing Projects
|
|
||||||
=================
|
|
||||||
|
|
||||||
Users must be associated with at least one project, though they may
|
|
||||||
belong to many. Therefore, you should add at least one project before
|
|
||||||
adding users.
|
|
||||||
|
|
||||||
Adding Projects
|
|
||||||
~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
To create a project through the OpenStack dashboard:
|
|
||||||
|
|
||||||
#. Log in as an administrative user.
|
|
||||||
|
|
||||||
#. Select the :guilabel:`Identity` tab in the left navigation bar.
|
|
||||||
|
|
||||||
#. Under Identity tab, click :guilabel:`Projects`.
|
|
||||||
|
|
||||||
#. Click the :guilabel:`Create Project` button.
|
|
||||||
|
|
||||||
You are prompted for a project name and an optional, but recommended,
|
|
||||||
description. Select the check box at the bottom of the form to enable
|
|
||||||
this project. By default, it is enabled, as shown below:
|
|
||||||
|
|
||||||
.. figure:: figures/create_project.png
|
|
||||||
:alt: Create Project form
|
|
||||||
|
|
||||||
It is also possible to add project members and adjust the project
|
|
||||||
quotas. We'll discuss those actions later, but in practice, it can be
|
|
||||||
quite convenient to deal with all these operations at one time.
|
|
||||||
|
|
||||||
To add a project through the command line, you must use the OpenStack
|
|
||||||
command line client.
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# openstack project create demo --domain default
|
|
||||||
|
|
||||||
This command creates a project named ``demo``. Optionally, you can add a
|
|
||||||
description string by appending ``--description PROJECT_DESCRIPTION``,
|
|
||||||
which can be very useful. You can also
|
|
||||||
create a project in a disabled state by appending ``--disable`` to the
|
|
||||||
command. By default, projects are created in an enabled state.
|
|
@ -1,451 +0,0 @@
|
|||||||
======
|
|
||||||
Quotas
|
|
||||||
======
|
|
||||||
|
|
||||||
To prevent system capacities from being exhausted without notification,
|
|
||||||
you can set up :term:`quotas <quota>`. Quotas are operational limits. For example,
|
|
||||||
the number of gigabytes allowed per tenant can be controlled to ensure that
|
|
||||||
a single tenant cannot consume all of the disk space. Quotas are
|
|
||||||
currently enforced at the tenant (or project) level, rather than the
|
|
||||||
user level.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
Because without sensible quotas a single tenant could use up all the
|
|
||||||
available resources, default quotas are shipped with OpenStack. You
|
|
||||||
should pay attention to which quota settings make sense for your
|
|
||||||
hardware capabilities.
|
|
||||||
|
|
||||||
Using the command-line interface, you can manage quotas for the
|
|
||||||
OpenStack Compute service and the Block Storage service.
|
|
||||||
|
|
||||||
Typically, default values are changed because a tenant requires more
|
|
||||||
than the OpenStack default of 10 volumes per tenant, or more than the
|
|
||||||
OpenStack default of 1 TB of disk space on a compute node.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
To view all tenants, run:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack project list
|
|
||||||
+---------------------------------+----------+
|
|
||||||
| ID | Name |
|
|
||||||
+---------------------------------+----------+
|
|
||||||
| a981642d22c94e159a4a6540f70f9f8 | admin |
|
|
||||||
| 934b662357674c7b9f5e4ec6ded4d0e | tenant01 |
|
|
||||||
| 7bc1dbfd7d284ec4a856ea1eb82dca8 | tenant02 |
|
|
||||||
| 9c554aaef7804ba49e1b21cbd97d218 | services |
|
|
||||||
+---------------------------------+----------+
|
|
||||||
|
|
||||||
Set Image Quotas
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
You can restrict a project's image storage by total number of bytes.
|
|
||||||
Currently, this quota is applied cloud-wide, so if you were to set an
|
|
||||||
Image quota limit of 5 GB, then all projects in your cloud will be able
|
|
||||||
to store only 5 GB of images and snapshots.
|
|
||||||
|
|
||||||
To enable this feature, edit the ``/etc/glance/glance-api.conf`` file,
|
|
||||||
and under the ``[DEFAULT]`` section, add:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
user_storage_quota = <bytes>
|
|
||||||
|
|
||||||
For example, to restrict a project's image storage to 5 GB, do this:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
user_storage_quota = 5368709120
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
There is a configuration option in ``/etc/glance/glance-api.conf`` that limits
|
|
||||||
the number of members allowed per image, called
|
|
||||||
``image_member_quota``, set to 128 by default. That setting is a
|
|
||||||
different quota from the storage quota.
|
|
||||||
|
|
||||||
Set Compute Service Quotas
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
As an administrative user, you can update the Compute service quotas for
|
|
||||||
an existing tenant, as well as update the quota defaults for a new
|
|
||||||
tenant. See :ref:`table_compute_quota`.
|
|
||||||
|
|
||||||
.. _table_compute_quota:
|
|
||||||
|
|
||||||
.. list-table:: Compute quota descriptions
|
|
||||||
:widths: 30 40 30
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* - Quota
|
|
||||||
- Description
|
|
||||||
- Property name
|
|
||||||
* - Fixed IPs
|
|
||||||
- Number of fixed IP addresses allowed per project.
|
|
||||||
This number must be equal to or greater than the number
|
|
||||||
of allowed instances.
|
|
||||||
- ``fixed-ips``
|
|
||||||
* - Floating IPs
|
|
||||||
- Number of floating IP addresses allowed per project.
|
|
||||||
- ``floating-ips``
|
|
||||||
* - Injected file content bytes
|
|
||||||
- Number of content bytes allowed per injected file.
|
|
||||||
- ``injected-file-content-bytes``
|
|
||||||
* - Injected file path bytes
|
|
||||||
- Number of bytes allowed per injected file path.
|
|
||||||
- ``injected-file-path-bytes``
|
|
||||||
* - Injected files
|
|
||||||
- Number of injected files allowed per project.
|
|
||||||
- ``injected-files``
|
|
||||||
* - Instances
|
|
||||||
- Number of instances allowed per project.
|
|
||||||
- ``instances``
|
|
||||||
* - Key pairs
|
|
||||||
- Number of key pairs allowed per user.
|
|
||||||
- ``key-pairs``
|
|
||||||
* - Metadata items
|
|
||||||
- Number of metadata items allowed per instance.
|
|
||||||
- ``metadata-items``
|
|
||||||
* - RAM
|
|
||||||
- Megabytes of instance RAM allowed per project.
|
|
||||||
- ``ram``
|
|
||||||
* - Security group rules
|
|
||||||
- Number of security group rules per project.
|
|
||||||
- ``security-group-rules``
|
|
||||||
* - Security groups
|
|
||||||
- Number of security groups per project.
|
|
||||||
- ``security-groups``
|
|
||||||
* - VCPUs
|
|
||||||
- Number of instance cores allowed per project.
|
|
||||||
- ``cores``
|
|
||||||
* - Server Groups
|
|
||||||
- Number of server groups per project.
|
|
||||||
- ``server_groups``
|
|
||||||
* - Server Group Members
|
|
||||||
- Number of servers per server group.
|
|
||||||
- ``server_group_members``
|
|
||||||
|
|
||||||
View and update compute quotas for a tenant (project)
|
|
||||||
-----------------------------------------------------
|
|
||||||
|
|
||||||
As an administrative user, you can use the :command:`nova quota-*`
|
|
||||||
commands, which are provided by the
|
|
||||||
``python-novaclient`` package, to view and update tenant quotas.
|
|
||||||
|
|
||||||
**To view and update default quota values**
|
|
||||||
|
|
||||||
#. List all default quotas for all tenants, as follows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ nova quota-defaults
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ nova quota-defaults
|
|
||||||
+-----------------------------+-------+
|
|
||||||
| Quota | Limit |
|
|
||||||
+-----------------------------+-------+
|
|
||||||
| instances | 10 |
|
|
||||||
| cores | 20 |
|
|
||||||
| ram | 51200 |
|
|
||||||
| floating_ips | 10 |
|
|
||||||
| fixed_ips | -1 |
|
|
||||||
| metadata_items | 128 |
|
|
||||||
| injected_files | 5 |
|
|
||||||
| injected_file_content_bytes | 10240 |
|
|
||||||
| injected_file_path_bytes | 255 |
|
|
||||||
| key_pairs | 100 |
|
|
||||||
| security_groups | 10 |
|
|
||||||
| security_group_rules | 20 |
|
|
||||||
| server_groups | 10 |
|
|
||||||
| server_group_members | 10 |
|
|
||||||
+-----------------------------+-------+
|
|
||||||
|
|
||||||
#. Update a default value for a new tenant, as follows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ nova quota-class-update default key value
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ nova quota-class-update default --instances 15
|
|
||||||
|
|
||||||
**To view quota values for a tenant (project)**
|
|
||||||
|
|
||||||
#. Place the tenant ID in a variable:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ tenant=$(openstack project list | awk '/tenantName/ {print $2}')
|
|
||||||
|
|
||||||
#. List the currently set quota values for a tenant, as follows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ nova quota-show --tenant $tenant
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ nova quota-show --tenant $tenant
|
|
||||||
+-----------------------------+-------+
|
|
||||||
| Quota | Limit |
|
|
||||||
+-----------------------------+-------+
|
|
||||||
| instances | 10 |
|
|
||||||
| cores | 20 |
|
|
||||||
| ram | 51200 |
|
|
||||||
| floating_ips | 10 |
|
|
||||||
| fixed_ips | -1 |
|
|
||||||
| metadata_items | 128 |
|
|
||||||
| injected_files | 5 |
|
|
||||||
| injected_file_content_bytes | 10240 |
|
|
||||||
| injected_file_path_bytes | 255 |
|
|
||||||
| key_pairs | 100 |
|
|
||||||
| security_groups | 10 |
|
|
||||||
| security_group_rules | 20 |
|
|
||||||
| server_groups | 10 |
|
|
||||||
| server_group_members | 10 |
|
|
||||||
+-----------------------------+-------+
|
|
||||||
|
|
||||||
**To update quota values for a tenant (project)**
|
|
||||||
|
|
||||||
#. Obtain the tenant ID, as follows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ tenant=$(openstack project list | awk '/tenantName/ {print $2}')
|
|
||||||
|
|
||||||
#. Update a particular quota value, as follows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# nova quota-update --quotaName quotaValue tenantID
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# nova quota-update --floating-ips 20 $tenant
|
|
||||||
# nova quota-show --tenant $tenant
|
|
||||||
+-----------------------------+-------+
|
|
||||||
| Quota | Limit |
|
|
||||||
+-----------------------------+-------+
|
|
||||||
| instances | 10 |
|
|
||||||
| cores | 20 |
|
|
||||||
| ram | 51200 |
|
|
||||||
| floating_ips | 20 |
|
|
||||||
| fixed_ips | -1 |
|
|
||||||
| metadata_items | 128 |
|
|
||||||
| injected_files | 5 |
|
|
||||||
| injected_file_content_bytes | 10240 |
|
|
||||||
| injected_file_path_bytes | 255 |
|
|
||||||
| key_pairs | 100 |
|
|
||||||
| security_groups | 10 |
|
|
||||||
| security_group_rules | 20 |
|
|
||||||
| server_groups | 10 |
|
|
||||||
| server_group_members | 10 |
|
|
||||||
+-----------------------------+-------+
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
To view a list of options for the ``nova quota-update`` command, run:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ nova help quota-update
|
|
||||||
|
|
||||||
Set Object Storage Quotas
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
There are currently two categories of quotas for Object Storage:
|
|
||||||
|
|
||||||
Container quotas
|
|
||||||
Limit the total size (in bytes) or number of objects that can be
|
|
||||||
stored in a single container.
|
|
||||||
|
|
||||||
Account quotas
|
|
||||||
Limit the total size (in bytes) that a user has available in the
|
|
||||||
Object Storage service.
|
|
||||||
|
|
||||||
To take advantage of either container quotas or account quotas, your
|
|
||||||
Object Storage proxy server must have ``container_quotas`` or
|
|
||||||
``account_quotas`` (or both) added to the ``[pipeline:main]`` pipeline.
|
|
||||||
Each quota type also requires its own section in the
|
|
||||||
``proxy-server.conf`` file:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
[pipeline:main]
|
|
||||||
pipeline = catch_errors [...] slo dlo account_quotas proxy-server
|
|
||||||
|
|
||||||
[filter:account_quotas]
|
|
||||||
use = egg:swift#account_quotas
|
|
||||||
|
|
||||||
[filter:container_quotas]
|
|
||||||
use = egg:swift#container_quotas
|
|
||||||
|
|
||||||
To view and update Object Storage quotas, use the :command:`swift` command
|
|
||||||
provided by the ``python-swiftclient`` package. Any user included in the
|
|
||||||
project can view the quotas placed on their project. To update Object
|
|
||||||
Storage quotas on a project, you must have the role of ResellerAdmin in
|
|
||||||
the project that the quota is being applied to.
|
|
||||||
|
|
||||||
To view account quotas placed on a project:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ swift stat
|
|
||||||
Account: AUTH_b36ed2d326034beba0a9dd1fb19b70f9
|
|
||||||
Containers: 0
|
|
||||||
Objects: 0
|
|
||||||
Bytes: 0
|
|
||||||
Meta Quota-Bytes: 214748364800
|
|
||||||
X-Timestamp: 1351050521.29419
|
|
||||||
Content-Type: text/plain; charset=utf-8
|
|
||||||
Accept-Ranges: bytes
|
|
||||||
|
|
||||||
To apply or update account quotas on a project:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ swift post -m quota-bytes:
|
|
||||||
<bytes>
|
|
||||||
|
|
||||||
For example, to place a 5 GB quota on an account:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ swift post -m quota-bytes:
|
|
||||||
5368709120
|
|
||||||
|
|
||||||
To verify the quota, run the :command:`swift stat` command again:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ swift stat
|
|
||||||
Account: AUTH_b36ed2d326034beba0a9dd1fb19b70f9
|
|
||||||
Containers: 0
|
|
||||||
Objects: 0
|
|
||||||
Bytes: 0
|
|
||||||
Meta Quota-Bytes: 5368709120
|
|
||||||
X-Timestamp: 1351541410.38328
|
|
||||||
Content-Type: text/plain; charset=utf-8
|
|
||||||
Accept-Ranges: bytes
|
|
||||||
|
|
||||||
Set Block Storage Quotas
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
As an administrative user, you can update the Block Storage service
|
|
||||||
quotas for a tenant, as well as update the quota defaults for a new
|
|
||||||
tenant. See :ref:`table_block_storage_quota`.
|
|
||||||
|
|
||||||
.. _table_block_storage_quota:
|
|
||||||
|
|
||||||
.. list-table:: Table: Block Storage quota descriptions
|
|
||||||
:widths: 50 50
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* - Property name
|
|
||||||
- Description
|
|
||||||
* - gigabytes
|
|
||||||
- Number of volume gigabytes allowed per tenant
|
|
||||||
* - snapshots
|
|
||||||
- Number of Block Storage snapshots allowed per tenant.
|
|
||||||
* - volumes
|
|
||||||
- Number of Block Storage volumes allowed per tenant
|
|
||||||
|
|
||||||
View and update Block Storage quotas for a tenant (project)
|
|
||||||
-----------------------------------------------------------
|
|
||||||
|
|
||||||
As an administrative user, you can use the :command:`cinder quota-*`
|
|
||||||
commands, which are provided by the
|
|
||||||
``python-cinderclient`` package, to view and update tenant quotas.
|
|
||||||
|
|
||||||
**To view and update default Block Storage quota values**
|
|
||||||
|
|
||||||
#. List all default quotas for all tenants, as follows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ cinder quota-defaults tenantID
|
|
||||||
|
|
||||||
#. Obtain the tenant ID, as follows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ tenant=$(openstack project list | awk '/tenantName/ {print $2}')
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ cinder quota-defaults $tenant
|
|
||||||
+-----------+-------+
|
|
||||||
| Property | Value |
|
|
||||||
+-----------+-------+
|
|
||||||
| gigabytes | 1000 |
|
|
||||||
| snapshots | 10 |
|
|
||||||
| volumes | 10 |
|
|
||||||
+-----------+-------+
|
|
||||||
|
|
||||||
#. To update a default value for a new tenant, update the property in the
|
|
||||||
``/etc/cinder/cinder.conf`` file.
|
|
||||||
|
|
||||||
**To view Block Storage quotas for a tenant (project)**
|
|
||||||
|
|
||||||
#. View quotas for the tenant, as follows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cinder quota-show tenantID
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cinder quota-show $tenant
|
|
||||||
+-----------+-------+
|
|
||||||
| Property | Value |
|
|
||||||
+-----------+-------+
|
|
||||||
| gigabytes | 1000 |
|
|
||||||
| snapshots | 10 |
|
|
||||||
| volumes | 10 |
|
|
||||||
+-----------+-------+
|
|
||||||
|
|
||||||
**To update Block Storage quotas for a tenant (project)**
|
|
||||||
|
|
||||||
#. Place the tenant ID in a variable:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ tenant=$(openstack project list | awk '/tenantName/ {print $2}')
|
|
||||||
|
|
||||||
#. Update a particular quota value, as follows:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cinder quota-update --quotaName NewValue tenantID
|
|
||||||
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cinder quota-update --volumes 15 $tenant
|
|
||||||
# cinder quota-show $tenant
|
|
||||||
+-----------+-------+
|
|
||||||
| Property | Value |
|
|
||||||
+-----------+-------+
|
|
||||||
| gigabytes | 1000 |
|
|
||||||
| snapshots | 10 |
|
|
||||||
| volumes | 15 |
|
|
||||||
+-----------+-------+
|
|
@ -1,18 +0,0 @@
|
|||||||
============
|
|
||||||
Uninstalling
|
|
||||||
============
|
|
||||||
|
|
||||||
While we'd always recommend using your automated deployment system to
|
|
||||||
reinstall systems from scratch, sometimes you do need to remove
|
|
||||||
OpenStack from a system the hard way. Here's how:
|
|
||||||
|
|
||||||
* Remove all packages.
|
|
||||||
* Remove remaining files.
|
|
||||||
* Remove databases.
|
|
||||||
|
|
||||||
These steps depend on your underlying distribution, but in general you
|
|
||||||
should be looking for :command:`purge` commands in your package manager, like
|
|
||||||
:command:`aptitude purge ~c $package`. Following this, you can look for
|
|
||||||
orphaned files in the directories referenced throughout this guide. To
|
|
||||||
uninstall the database properly, refer to the manual appropriate for the
|
|
||||||
product in use.
|
|
@ -1,553 +0,0 @@
|
|||||||
========
|
|
||||||
Upgrades
|
|
||||||
========
|
|
||||||
|
|
||||||
With the exception of Object Storage, upgrading from one version of
|
|
||||||
OpenStack to another can take a great deal of effort. This chapter
|
|
||||||
provides some guidance on the operational aspects that you should
|
|
||||||
consider for performing an upgrade for an OpenStack environment.
|
|
||||||
|
|
||||||
Pre-upgrade considerations
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Upgrade planning
|
|
||||||
----------------
|
|
||||||
|
|
||||||
- Thoroughly review the `release
|
|
||||||
notes <https://releases.openstack.org/>`_ to learn
|
|
||||||
about new, updated, and deprecated features. Find incompatibilities
|
|
||||||
between versions.
|
|
||||||
|
|
||||||
- Consider the impact of an upgrade to users. The upgrade process
|
|
||||||
interrupts management of your environment including the dashboard. If
|
|
||||||
you properly prepare for the upgrade, existing instances, networking,
|
|
||||||
and storage should continue to operate. However, instances might
|
|
||||||
experience intermittent network interruptions.
|
|
||||||
|
|
||||||
- Consider the approach to upgrading your environment. You can perform
|
|
||||||
an upgrade with operational instances, but this is a dangerous
|
|
||||||
approach. You might consider using live migration to temporarily
|
|
||||||
relocate instances to other compute nodes while performing upgrades.
|
|
||||||
However, you must ensure database consistency throughout the process;
|
|
||||||
otherwise your environment might become unstable. Also, don't forget
|
|
||||||
to provide sufficient notice to your users, including giving them
|
|
||||||
plenty of time to perform their own backups.
|
|
||||||
|
|
||||||
- Consider adopting structure and options from the service
|
|
||||||
configuration files and merging them with existing configuration
|
|
||||||
files. The `OpenStack Configuration
|
|
||||||
Reference <https://docs.openstack.org/ocata/config-reference/>`_
|
|
||||||
contains new, updated, and deprecated options for most services.
|
|
||||||
|
|
||||||
- Like all major system upgrades, your upgrade could fail for one or
|
|
||||||
more reasons. You can prepare for this situation by having the
|
|
||||||
ability to roll back your environment to the previous release,
|
|
||||||
including databases, configuration files, and packages. We provide an
|
|
||||||
example process for rolling back your environment in
|
|
||||||
:ref:`rolling_back_a_failed_upgrade`.
|
|
||||||
|
|
||||||
- Develop an upgrade procedure and assess it thoroughly by using a test
|
|
||||||
environment similar to your production environment.
|
|
||||||
|
|
||||||
Pre-upgrade testing environment
|
|
||||||
-------------------------------
|
|
||||||
|
|
||||||
The most important step is the pre-upgrade testing. If you are upgrading
|
|
||||||
immediately after release of a new version, undiscovered bugs might
|
|
||||||
hinder your progress. Some deployers prefer to wait until the first
|
|
||||||
point release is announced. However, if you have a significant
|
|
||||||
deployment, you might follow the development and testing of the release
|
|
||||||
to ensure that bugs for your use cases are fixed.
|
|
||||||
|
|
||||||
Each OpenStack cloud is different even if you have a near-identical
|
|
||||||
architecture as described in this guide. As a result, you must still
|
|
||||||
test upgrades between versions in your environment using an approximate
|
|
||||||
clone of your environment.
|
|
||||||
|
|
||||||
However, that is not to say that it needs to be the same size or use
|
|
||||||
identical hardware as the production environment. It is important to
|
|
||||||
consider the hardware and scale of the cloud that you are upgrading. The
|
|
||||||
following tips can help you minimise the cost:
|
|
||||||
|
|
||||||
Use your own cloud
|
|
||||||
The simplest place to start testing the next version of OpenStack is
|
|
||||||
by setting up a new environment inside your own cloud. This might
|
|
||||||
seem odd, especially the double virtualization used in running
|
|
||||||
compute nodes. But it is a sure way to very quickly test your
|
|
||||||
configuration.
|
|
||||||
|
|
||||||
Use a public cloud
|
|
||||||
Consider using a public cloud to test the scalability limits of your
|
|
||||||
cloud controller configuration. Most public clouds bill by the hour,
|
|
||||||
which means it can be inexpensive to perform even a test with many
|
|
||||||
nodes.
|
|
||||||
|
|
||||||
Make another storage endpoint on the same system
|
|
||||||
If you use an external storage plug-in or shared file system with
|
|
||||||
your cloud, you can test whether it works by creating a second share
|
|
||||||
or endpoint. This allows you to test the system before entrusting
|
|
||||||
the new version on to your storage.
|
|
||||||
|
|
||||||
Watch the network
|
|
||||||
Even at smaller-scale testing, look for excess network packets to
|
|
||||||
determine whether something is going horribly wrong in
|
|
||||||
inter-component communication.
|
|
||||||
|
|
||||||
To set up the test environment, you can use one of several methods:
|
|
||||||
|
|
||||||
- Do a full manual install by using the `Installation Tutorials and Guides
|
|
||||||
<https://docs.openstack.org/project-install-guide/ocata/>`_ for
|
|
||||||
your platform. Review the final configuration files and installed
|
|
||||||
packages.
|
|
||||||
|
|
||||||
- Create a clone of your automated configuration infrastructure with
|
|
||||||
changed package repository URLs.
|
|
||||||
|
|
||||||
Alter the configuration until it works.
|
|
||||||
|
|
||||||
Either approach is valid. Use the approach that matches your experience.
|
|
||||||
|
|
||||||
An upgrade pre-testing system is excellent for getting the configuration
|
|
||||||
to work. However, it is important to note that the historical use of the
|
|
||||||
system and differences in user interaction can affect the success of
|
|
||||||
upgrades.
|
|
||||||
|
|
||||||
If possible, we highly recommend that you dump your production database
|
|
||||||
tables and test the upgrade in your development environment using this
|
|
||||||
data. Several MySQL bugs have been uncovered during database migrations
|
|
||||||
because of slight table differences between a fresh installation and
|
|
||||||
tables that migrated from one version to another. This will have impact
|
|
||||||
on large real datasets, which you do not want to encounter during a
|
|
||||||
production outage.
|
|
||||||
|
|
||||||
Artificial scale testing can go only so far. After your cloud is
|
|
||||||
upgraded, you must pay careful attention to the performance aspects of
|
|
||||||
your cloud.
|
|
||||||
|
|
||||||
Upgrade Levels
|
|
||||||
--------------
|
|
||||||
|
|
||||||
Upgrade levels are a feature added to OpenStack Compute since the
|
|
||||||
Grizzly release to provide version locking on the RPC (Message Queue)
|
|
||||||
communications between the various Compute services.
|
|
||||||
|
|
||||||
This functionality is an important piece of the puzzle when it comes to
|
|
||||||
live upgrades and is conceptually similar to the existing API versioning
|
|
||||||
that allows OpenStack services of different versions to communicate
|
|
||||||
without issue.
|
|
||||||
|
|
||||||
Without upgrade levels, an X+1 version Compute service can receive and
|
|
||||||
understand X version RPC messages, but it can only send out X+1 version
|
|
||||||
RPC messages. For example, if a nova-conductor process has been upgraded
|
|
||||||
to X+1 version, then the conductor service will be able to understand
|
|
||||||
messages from X version nova-compute processes, but those compute
|
|
||||||
services will not be able to understand messages sent by the conductor
|
|
||||||
service.
|
|
||||||
|
|
||||||
During an upgrade, operators can add configuration options to
|
|
||||||
``nova.conf`` which lock the version of RPC messages and allow live
|
|
||||||
upgrading of the services without interruption caused by version
|
|
||||||
mismatch. The configuration options allow the specification of RPC
|
|
||||||
version numbers if desired, but release name alias are also supported.
|
|
||||||
For example:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
[upgrade_levels]
|
|
||||||
compute=X+1
|
|
||||||
conductor=X+1
|
|
||||||
scheduler=X+1
|
|
||||||
|
|
||||||
will keep the RPC version locked across the specified services to the
|
|
||||||
RPC version used in X+1. As all instances of a particular service are
|
|
||||||
upgraded to the newer version, the corresponding line can be removed
|
|
||||||
from ``nova.conf``.
|
|
||||||
|
|
||||||
Using this functionality, ideally one would lock the RPC version to the
|
|
||||||
OpenStack version being upgraded from on nova-compute nodes, to ensure
|
|
||||||
that, for example X+1 version nova-compute processes will continue to
|
|
||||||
work with X version nova-conductor processes while the upgrade
|
|
||||||
completes. Once the upgrade of nova-compute processes is complete, the
|
|
||||||
operator can move onto upgrading nova-conductor and remove the version
|
|
||||||
locking for nova-compute in ``nova.conf``.
|
|
||||||
|
|
||||||
Upgrade process
|
|
||||||
~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This section describes the process to upgrade a basic OpenStack
|
|
||||||
deployment based on the basic two-node architecture in the `Installation
|
|
||||||
Tutorials and Guides
|
|
||||||
<https://docs.openstack.org/project-install-guide/ocata/>`_. All
|
|
||||||
nodes must run a supported distribution of Linux with a recent kernel
|
|
||||||
and the current release packages.
|
|
||||||
|
|
||||||
Service specific upgrade instructions
|
|
||||||
-------------------------------------
|
|
||||||
|
|
||||||
Refer to the following upgrade notes for information on upgrading specific
|
|
||||||
OpenStack services:
|
|
||||||
|
|
||||||
* `Networking service (neutron) upgrades
|
|
||||||
<https://docs.openstack.org/developer/neutron/devref/upgrade.html>`_
|
|
||||||
* `Compute service (nova) upgrades
|
|
||||||
<https://docs.openstack.org/developer/nova/upgrade.html>`_
|
|
||||||
* `Identity service (keystone) upgrades
|
|
||||||
<https://docs.openstack.org/developer/keystone/upgrading.html>`_
|
|
||||||
* `Block Storage service (cinder) upgrades
|
|
||||||
<https://docs.openstack.org/developer/cinder/upgrade.html>`_
|
|
||||||
* `Image service (glance) zero downtime database upgrades
|
|
||||||
<https://docs.openstack.org/developer/glance/db.html#zero-downtime-database-upgrades>`_
|
|
||||||
* `Image service (glance) rolling upgrades
|
|
||||||
<https://docs.openstack.org/developer/glance/rollingupgrades.html>`_
|
|
||||||
* `Bare Metal service (ironic) upgrades
|
|
||||||
<https://docs.openstack.org/developer/ironic/deploy/upgrade-guide.html>`_
|
|
||||||
* `Object Storage service (swift) upgrades
|
|
||||||
<https://docs.openstack.org/developer/swift/overview_policies.html#upgrade-policy>`_
|
|
||||||
* `Telemetry service (ceilometer) upgrades
|
|
||||||
<https://docs.openstack.org/developer/ceilometer/install/upgrade.html>`_
|
|
||||||
|
|
||||||
Prerequisites
|
|
||||||
-------------
|
|
||||||
|
|
||||||
- Perform some cleaning of the environment prior to starting the
|
|
||||||
upgrade process to ensure a consistent state. For example, instances
|
|
||||||
not fully purged from the system after deletion might cause
|
|
||||||
indeterminate behavior.
|
|
||||||
|
|
||||||
- For environments using the OpenStack Networking service (neutron),
|
|
||||||
verify the release version of the database. For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# su -s /bin/sh -c "neutron-db-manage --config-file /etc/neutron/neutron.conf \
|
|
||||||
--config-file /etc/neutron/plugins/ml2/ml2_conf.ini current" neutron
|
|
||||||
|
|
||||||
Perform a backup
|
|
||||||
----------------
|
|
||||||
|
|
||||||
#. Save the configuration files on all nodes. For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# for i in keystone glance nova neutron openstack-dashboard cinder heat ceilometer; \
|
|
||||||
do mkdir $i-RELEASE_NAME; \
|
|
||||||
done
|
|
||||||
# for i in keystone glance nova neutron openstack-dashboard cinder heat ceilometer; \
|
|
||||||
do cp -r /etc/$i/* $i-RELEASE_NAME/; \
|
|
||||||
done
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
You can modify this example script on each node to handle different
|
|
||||||
services.
|
|
||||||
|
|
||||||
#. Make a full database backup of your production data. Since the Kilo release,
|
|
||||||
database downgrades are not supported, and restoring from backup is the only
|
|
||||||
method available to retrieve a previous database version.
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mysqldump -u root -p --opt --add-drop-database --all-databases > RELEASE_NAME-db-backup.sql
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Consider updating your SQL server configuration as described in the
|
|
||||||
`Installation Tutorials and Guides
|
|
||||||
<https://docs.openstack.org/project-install-guide/ocata/>`_.
|
|
||||||
|
|
||||||
Manage repositories
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
On all nodes:
|
|
||||||
|
|
||||||
#. Remove the repository for the previous release packages.
|
|
||||||
|
|
||||||
#. Add the repository for the new release packages.
|
|
||||||
|
|
||||||
#. Update the repository database.
|
|
||||||
|
|
||||||
Upgrade packages on each node
|
|
||||||
-----------------------------
|
|
||||||
|
|
||||||
Depending on your specific configuration, upgrading all packages might
|
|
||||||
restart or break services supplemental to your OpenStack environment.
|
|
||||||
For example, if you use the TGT iSCSI framework for Block Storage
|
|
||||||
volumes and the upgrade includes new packages for it, the package
|
|
||||||
manager might restart the TGT iSCSI services and impact connectivity to
|
|
||||||
volumes.
|
|
||||||
|
|
||||||
If the package manager prompts you to update configuration files, reject
|
|
||||||
the changes. The package manager appends a suffix to newer versions of
|
|
||||||
configuration files. Consider reviewing and adopting content from these
|
|
||||||
files.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
You may need to explicitly install the ``ipset`` package if your
|
|
||||||
distribution does not install it as a dependency.
|
|
||||||
|
|
||||||
Update services
|
|
||||||
---------------
|
|
||||||
|
|
||||||
To update a service on each node, you generally modify one or more
|
|
||||||
configuration files, stop the service, synchronize the database schema,
|
|
||||||
and start the service. Some services require different steps. We
|
|
||||||
recommend verifying operation of each service before proceeding to the
|
|
||||||
next service.
|
|
||||||
|
|
||||||
The order you should upgrade services, and any changes from the general
|
|
||||||
upgrade process is described below:
|
|
||||||
|
|
||||||
**Controller node**
|
|
||||||
|
|
||||||
#. Identity service - Clear any expired tokens before synchronizing
|
|
||||||
the database.
|
|
||||||
|
|
||||||
#. Image service
|
|
||||||
|
|
||||||
#. Compute service, including networking components.
|
|
||||||
|
|
||||||
#. Networking service
|
|
||||||
|
|
||||||
#. Block Storage service
|
|
||||||
|
|
||||||
#. Dashboard - In typical environments, updating Dashboard only
|
|
||||||
requires restarting the Apache HTTP service.
|
|
||||||
|
|
||||||
#. Orchestration service
|
|
||||||
|
|
||||||
#. Telemetry service - In typical environments, updating the
|
|
||||||
Telemetry service only requires restarting the service.
|
|
||||||
|
|
||||||
#. Compute service - Edit the configuration file and restart the service.
|
|
||||||
|
|
||||||
#. Networking service - Edit the configuration file and restart the service.
|
|
||||||
|
|
||||||
**Storage nodes**
|
|
||||||
|
|
||||||
* Block Storage service - Updating the Block Storage service only requires
|
|
||||||
restarting the service.
|
|
||||||
|
|
||||||
**Compute nodes**
|
|
||||||
|
|
||||||
* Networking service - Edit the configuration file and restart the service.
|
|
||||||
|
|
||||||
Final steps
|
|
||||||
-----------
|
|
||||||
|
|
||||||
On all distributions, you must perform some final tasks to complete the
|
|
||||||
upgrade process.
|
|
||||||
|
|
||||||
#. Decrease DHCP timeouts by modifying the :file:`/etc/nova/nova.conf` file on
|
|
||||||
the compute nodes back to the original value for your environment.
|
|
||||||
|
|
||||||
#. Update all ``.ini`` files to match passwords and pipelines as required
|
|
||||||
for the OpenStack release in your environment.
|
|
||||||
|
|
||||||
#. After migration, users see different results from
|
|
||||||
:command:`openstack image list` and :command:`glance image-list`. To ensure
|
|
||||||
users see the same images in the list commands, edit the
|
|
||||||
:file:`/etc/glance/policy.json` file and :file:`/etc/nova/policy.json` file
|
|
||||||
to contain ``"context_is_admin": "role:admin"``, which limits access to
|
|
||||||
private images for projects.
|
|
||||||
|
|
||||||
#. Verify proper operation of your environment. Then, notify your users
|
|
||||||
that their cloud is operating normally again.
|
|
||||||
|
|
||||||
.. _rolling_back_a_failed_upgrade:
|
|
||||||
|
|
||||||
Rolling back a failed upgrade
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This section provides guidance for rolling back to a previous release of
|
|
||||||
OpenStack. All distributions follow a similar procedure.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
Rolling back your environment should be the final course of action
|
|
||||||
since you are likely to lose any data added since the backup.
|
|
||||||
|
|
||||||
A common scenario is to take down production management services in
|
|
||||||
preparation for an upgrade, completed part of the upgrade process, and
|
|
||||||
discovered one or more problems not encountered during testing. As a
|
|
||||||
consequence, you must roll back your environment to the original "known
|
|
||||||
good" state. You also made sure that you did not make any state changes
|
|
||||||
after attempting the upgrade process; no new instances, networks,
|
|
||||||
storage volumes, and so on. Any of these new resources will be in a
|
|
||||||
frozen state after the databases are restored from backup.
|
|
||||||
|
|
||||||
Within this scope, you must complete these steps to successfully roll
|
|
||||||
back your environment:
|
|
||||||
|
|
||||||
#. Roll back configuration files.
|
|
||||||
|
|
||||||
#. Restore databases from backup.
|
|
||||||
|
|
||||||
#. Roll back packages.
|
|
||||||
|
|
||||||
You should verify that you have the requisite backups to restore.
|
|
||||||
Rolling back upgrades is a tricky process because distributions tend to
|
|
||||||
put much more effort into testing upgrades than downgrades. Broken
|
|
||||||
downgrades take significantly more effort to troubleshoot and, resolve
|
|
||||||
than broken upgrades. Only you can weigh the risks of trying to push a
|
|
||||||
failed upgrade forward versus rolling it back. Generally, consider
|
|
||||||
rolling back as the very last option.
|
|
||||||
|
|
||||||
The following steps described for Ubuntu have worked on at least one
|
|
||||||
production environment, but they might not work for all environments.
|
|
||||||
|
|
||||||
**To perform a rollback**
|
|
||||||
|
|
||||||
#. Stop all OpenStack services.
|
|
||||||
|
|
||||||
#. Copy contents of configuration backup directories that you created
|
|
||||||
during the upgrade process back to ``/etc/<service>`` directory.
|
|
||||||
|
|
||||||
#. Restore databases from the ``RELEASE_NAME-db-backup.sql`` backup file
|
|
||||||
that you created with the :command:`mysqldump` command during the upgrade
|
|
||||||
process:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# mysql -u root -p < RELEASE_NAME-db-backup.sql
|
|
||||||
|
|
||||||
#. Downgrade OpenStack packages.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
Downgrading packages is by far the most complicated step; it is
|
|
||||||
highly dependent on the distribution and the overall administration
|
|
||||||
of the system.
|
|
||||||
|
|
||||||
#. Determine which OpenStack packages are installed on your system. Use the
|
|
||||||
:command:`dpkg --get-selections` command. Filter for OpenStack
|
|
||||||
packages, filter again to omit packages explicitly marked in the
|
|
||||||
``deinstall`` state, and save the final output to a file. For example,
|
|
||||||
the following command covers a controller node with keystone, glance,
|
|
||||||
nova, neutron, and cinder:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# dpkg --get-selections | grep -e keystone -e glance -e nova -e neutron \
|
|
||||||
-e cinder | grep -v deinstall | tee openstack-selections
|
|
||||||
cinder-api install
|
|
||||||
cinder-common install
|
|
||||||
cinder-scheduler install
|
|
||||||
cinder-volume install
|
|
||||||
glance install
|
|
||||||
glance-api install
|
|
||||||
glance-common install
|
|
||||||
glance-registry install
|
|
||||||
neutron-common install
|
|
||||||
neutron-dhcp-agent install
|
|
||||||
neutron-l3-agent install
|
|
||||||
neutron-lbaas-agent install
|
|
||||||
neutron-metadata-agent install
|
|
||||||
neutron-plugin-openvswitch install
|
|
||||||
neutron-plugin-openvswitch-agent install
|
|
||||||
neutron-server install
|
|
||||||
nova-api install
|
|
||||||
nova-common install
|
|
||||||
nova-conductor install
|
|
||||||
nova-consoleauth install
|
|
||||||
nova-novncproxy install
|
|
||||||
nova-objectstore install
|
|
||||||
nova-scheduler install
|
|
||||||
python-cinder install
|
|
||||||
python-cinderclient install
|
|
||||||
python-glance install
|
|
||||||
python-glanceclient install
|
|
||||||
python-keystone install
|
|
||||||
python-keystoneclient install
|
|
||||||
python-neutron install
|
|
||||||
python-neutronclient install
|
|
||||||
python-nova install
|
|
||||||
python-novaclient install
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Depending on the type of server, the contents and order of your
|
|
||||||
package list might vary from this example.
|
|
||||||
|
|
||||||
#. You can determine the package versions available for reversion by using
|
|
||||||
the ``apt-cache policy`` command. For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# apt-cache policy nova-common
|
|
||||||
|
|
||||||
nova-common:
|
|
||||||
Installed: 2:14.0.1-0ubuntu1~cloud0
|
|
||||||
Candidate: 2:14.0.1-0ubuntu1~cloud0
|
|
||||||
Version table:
|
|
||||||
*** 2:14.0.1-0ubuntu1~cloud0 500
|
|
||||||
500 http://ubuntu-cloud.archive.canonical.com/ubuntu xenial-updates/newton/main amd64 Packages
|
|
||||||
100 /var/lib/dpkg/status
|
|
||||||
2:13.1.2-0ubuntu2 500
|
|
||||||
500 http://archive.ubuntu.com/ubuntu xenial-updates/main amd64 Packages
|
|
||||||
2:13.0.0-0ubuntu2 500
|
|
||||||
500 http://archive.ubuntu.com/ubuntu xenial/main amd64 Packages
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
If you removed the release repositories, you must first reinstall
|
|
||||||
them and run the :command:`apt-get update` command.
|
|
||||||
|
|
||||||
The command output lists the currently installed version of the package,
|
|
||||||
newest candidate version, and all versions along with the repository that
|
|
||||||
contains each version. Look for the appropriate release
|
|
||||||
version— ``2:14.0.1-0ubuntu1~cloud0`` in this case. The process of
|
|
||||||
manually picking through this list of packages is rather tedious and
|
|
||||||
prone to errors. You should consider using a script to help
|
|
||||||
with this process. For example:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# for i in `cut -f 1 openstack-selections | sed 's/neutron/;'`;
|
|
||||||
do echo -n $i ;apt-cache policy $i | grep -B 1 RELEASE_NAME |
|
|
||||||
grep -v Packages | awk '{print "="$1}';done | tr '\n' ' ' |
|
|
||||||
tee openstack-RELEASE_NAME-versions
|
|
||||||
cinder-api=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
cinder-common=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
cinder-scheduler=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
cinder-volume=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
glance=2:13.0.0-0ubuntu1~cloud0
|
|
||||||
glance-api=2:13.0.0-0ubuntu1~cloud0 500
|
|
||||||
glance-common=2:13.0.0-0ubuntu1~cloud0 500
|
|
||||||
glance-registry=2:13.0.0-0ubuntu1~cloud0 500
|
|
||||||
neutron-common=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
neutron-dhcp-agent=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
neutron-l3-agent=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
neutron-lbaas-agent=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
neutron-metadata-agent=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
neutron-server=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
nova-api=2:14.0.1-0ubuntu1~cloud0
|
|
||||||
nova-common=2:14.0.1-0ubuntu1~cloud0
|
|
||||||
nova-conductor=2:14.0.1-0ubuntu1~cloud0
|
|
||||||
nova-consoleauth=2:14.0.1-0ubuntu1~cloud0
|
|
||||||
nova-novncproxy=2:14.0.1-0ubuntu1~cloud0
|
|
||||||
nova-objectstore=2:14.0.1-0ubuntu1~cloud0
|
|
||||||
nova-scheduler=2:14.0.1-0ubuntu1~cloud0
|
|
||||||
python-cinder=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
python-cinderclient=1:1.9.0-0ubuntu1~cloud0
|
|
||||||
python-glance=2:13.0.0-0ubuntu1~cloud0
|
|
||||||
python-glanceclient=1:2.5.0-0ubuntu1~cloud0
|
|
||||||
python-neutron=2:9.0.0-0ubuntu1~cloud0
|
|
||||||
python-neutronclient=1:6.0.0-0ubuntu1~cloud0
|
|
||||||
python-nova=2:14.0.1-0ubuntu1~cloud0
|
|
||||||
python-novaclient=2:6.0.0-0ubuntu1~cloud0
|
|
||||||
python-openstackclient=3.2.0-0ubuntu2~cloud0
|
|
||||||
|
|
||||||
#. Use the :command:`apt-get install` command to install specific versions
|
|
||||||
of each package by specifying ``<package-name>=<version>``. The script in
|
|
||||||
the previous step conveniently created a list of ``package=version``
|
|
||||||
pairs for you:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# apt-get install `cat openstack-RELEASE_NAME-versions`
|
|
||||||
|
|
||||||
This step completes the rollback procedure. You should remove the
|
|
||||||
upgrade release repository and run :command:`apt-get update` to prevent
|
|
||||||
accidental upgrades until you solve whatever issue caused you to roll
|
|
||||||
back your environment.
|
|
@ -1,253 +0,0 @@
|
|||||||
===============
|
|
||||||
User Management
|
|
||||||
===============
|
|
||||||
|
|
||||||
The OpenStack Dashboard provides a graphical interface to manage users.
|
|
||||||
This section describes user management with the Dashboard.
|
|
||||||
|
|
||||||
You can also `manage projects, users, and roles
|
|
||||||
<https://docs.openstack.org/admin-guide/cli-manage-projects-users-and-roles.html>`_
|
|
||||||
from the command-line clients.
|
|
||||||
|
|
||||||
In addition, many sites write custom tools for local needs to enforce
|
|
||||||
local policies and provide levels of self-service to users that are not
|
|
||||||
currently available with packaged tools.
|
|
||||||
|
|
||||||
Creating New Users
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
To create a user, you need the following information:
|
|
||||||
|
|
||||||
* Username
|
|
||||||
* Description
|
|
||||||
* Email address
|
|
||||||
* Password
|
|
||||||
* Primary project
|
|
||||||
* Role
|
|
||||||
* Enabled
|
|
||||||
|
|
||||||
Username and email address are self-explanatory, though your site may
|
|
||||||
have local conventions you should observe. The primary project is simply
|
|
||||||
the first project the user is associated with and must exist prior to
|
|
||||||
creating the user. Role is almost always going to be "member." Out of
|
|
||||||
the box, OpenStack comes with two roles defined:
|
|
||||||
|
|
||||||
member
|
|
||||||
A typical user
|
|
||||||
|
|
||||||
admin
|
|
||||||
An administrative super user, which has full permissions across all
|
|
||||||
projects and should be used with great care
|
|
||||||
|
|
||||||
It is possible to define other roles, but doing so is uncommon.
|
|
||||||
|
|
||||||
Once you've gathered this information, creating the user in the
|
|
||||||
dashboard is just another web form similar to what we've seen before and
|
|
||||||
can be found by clicking the :guilabel:`Users` link in the
|
|
||||||
:guilabel:`Identity` navigation bar and then clicking the
|
|
||||||
:guilabel:`Create User` button at the top right.
|
|
||||||
|
|
||||||
Modifying users is also done from this :guilabel:`Users` page. If you have a
|
|
||||||
large number of users, this page can get quite crowded. The :guilabel:`Filter`
|
|
||||||
search box at the top of the page can be used to limit the users listing. A
|
|
||||||
form very similar to the user creation dialog can be pulled up by selecting
|
|
||||||
:guilabel:`Edit` from the actions drop-down menu at the end of the line for
|
|
||||||
the user you are modifying.
|
|
||||||
|
|
||||||
Associating Users with Projects
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Many sites run with users being associated with only one project. This
|
|
||||||
is a more conservative and simpler choice both for administration and
|
|
||||||
for users. Administratively, if a user reports a problem with an
|
|
||||||
instance or quota, it is obvious which project this relates to. Users
|
|
||||||
needn't worry about what project they are acting in if they are only in
|
|
||||||
one project. However, note that, by default, any user can affect the
|
|
||||||
resources of any other user within their project. It is also possible to
|
|
||||||
associate users with multiple projects if that makes sense for your
|
|
||||||
organization.
|
|
||||||
|
|
||||||
Associating existing users with an additional project or removing them
|
|
||||||
from an older project is done from the :guilabel:`Projects` page of the
|
|
||||||
dashboard by selecting :guilabel:`Manage Members` from the
|
|
||||||
:guilabel:`Actions` column, as shown in the screenshot below.
|
|
||||||
|
|
||||||
From this view, you can do a number of useful things, as well as a few
|
|
||||||
dangerous ones.
|
|
||||||
|
|
||||||
The first column of this form, named :guilabel:`All Users`, includes a list of
|
|
||||||
all the users in your cloud who are not already associated with this
|
|
||||||
project. The second column shows all the users who are. These lists can
|
|
||||||
be quite long, but they can be limited by typing a substring of the
|
|
||||||
username you are looking for in the filter field at the top of the
|
|
||||||
column.
|
|
||||||
|
|
||||||
From here, click the :guilabel:`+` icon to add users to the project.
|
|
||||||
Click the :guilabel:`-` to remove them.
|
|
||||||
|
|
||||||
.. figure:: figures/edit_project_member.png
|
|
||||||
:alt: Edit Project Members tab
|
|
||||||
|
|
||||||
The dangerous possibility comes with the ability to change member roles.
|
|
||||||
This is the dropdown list below the username in the
|
|
||||||
:guilabel:`Project Members` list. In virtually all cases,
|
|
||||||
this value should be set to :guilabel:`Member`. This example purposefully
|
|
||||||
shows an administrative user where this value is ``admin``.
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
The admin is global, not per project, so granting a user the ``admin``
|
|
||||||
role in any project gives the user administrative rights across the
|
|
||||||
whole cloud.
|
|
||||||
|
|
||||||
Typical use is to only create administrative users in a single project,
|
|
||||||
by convention the admin project, which is created by default during
|
|
||||||
cloud setup. If your administrative users also use the cloud to launch
|
|
||||||
and manage instances, it is strongly recommended that you use separate
|
|
||||||
user accounts for administrative access and normal operations and that
|
|
||||||
they be in distinct projects.
|
|
||||||
|
|
||||||
Customizing Authorization
|
|
||||||
-------------------------
|
|
||||||
|
|
||||||
The default :term:`authorization` settings allow administrative users
|
|
||||||
only to create resources on behalf of a different project.
|
|
||||||
OpenStack handles two kinds of authorization policies:
|
|
||||||
|
|
||||||
Operation based
|
|
||||||
Policies specify access criteria for specific operations, possibly
|
|
||||||
with fine-grained control over specific attributes.
|
|
||||||
|
|
||||||
Resource based
|
|
||||||
Whether access to a specific resource might be granted or not
|
|
||||||
according to the permissions configured for the resource (currently
|
|
||||||
available only for the network resource). The actual authorization
|
|
||||||
policies enforced in an OpenStack service vary from deployment to
|
|
||||||
deployment.
|
|
||||||
|
|
||||||
The policy engine reads entries from the ``policy.json`` file. The
|
|
||||||
actual location of this file might vary from distribution to
|
|
||||||
distribution: for nova, it is typically in ``/etc/nova/policy.json``.
|
|
||||||
You can update entries while the system is running, and you do not have
|
|
||||||
to restart services. Currently, the only way to update such policies is
|
|
||||||
to edit the policy file.
|
|
||||||
|
|
||||||
The OpenStack service's policy engine matches a policy directly. A rule
|
|
||||||
indicates evaluation of the elements of such policies. For instance, in
|
|
||||||
a ``compute:create: "rule:admin_or_owner"`` statement, the policy is
|
|
||||||
``compute:create``, and the rule is ``admin_or_owner``.
|
|
||||||
|
|
||||||
Policies are triggered by an OpenStack policy engine whenever one of
|
|
||||||
them matches an OpenStack API operation or a specific attribute being
|
|
||||||
used in a given operation. For instance, the engine tests the
|
|
||||||
``create:compute`` policy every time a user sends a
|
|
||||||
``POST /v2/{tenant_id}/servers`` request to the OpenStack Compute API
|
|
||||||
server. Policies can be also related to specific :term:`API extensions
|
|
||||||
<API extension>`. For instance, if a user needs an extension like
|
|
||||||
``compute_extension:rescue``, the attributes defined by the provider
|
|
||||||
extensions trigger the rule test for that operation.
|
|
||||||
|
|
||||||
An authorization policy can be composed by one or more rules. If more
|
|
||||||
rules are specified, evaluation policy is successful if any of the rules
|
|
||||||
evaluates successfully; if an API operation matches multiple policies,
|
|
||||||
then all the policies must evaluate successfully. Also, authorization
|
|
||||||
rules are recursive. Once a rule is matched, the rule(s) can be resolved
|
|
||||||
to another rule, until a terminal rule is reached. These are the rules
|
|
||||||
defined:
|
|
||||||
|
|
||||||
Role-based rules
|
|
||||||
Evaluate successfully if the user submitting the request has the
|
|
||||||
specified role. For instance, ``"role:admin"`` is successful if the
|
|
||||||
user submitting the request is an administrator.
|
|
||||||
|
|
||||||
Field-based rules
|
|
||||||
Evaluate successfully if a field of the resource specified in the
|
|
||||||
current request matches a specific value. For instance,
|
|
||||||
``"field:networks:shared=True"`` is successful if the attribute
|
|
||||||
shared of the network resource is set to ``true``.
|
|
||||||
|
|
||||||
Generic rules
|
|
||||||
Compare an attribute in the resource with an attribute extracted
|
|
||||||
from the user's security credentials and evaluates successfully if
|
|
||||||
the comparison is successful. For instance,
|
|
||||||
``"tenant_id:%(tenant_id)s"`` is successful if the tenant identifier
|
|
||||||
in the resource is equal to the tenant identifier of the user
|
|
||||||
submitting the request.
|
|
||||||
|
|
||||||
Here are snippets of the default nova ``policy.json`` file:
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
{
|
|
||||||
"context_is_admin": "role:admin",
|
|
||||||
"admin_or_owner": "is_admin:True", "project_id:%(project_id)s", ~~~~(1)~~~~
|
|
||||||
"default": "rule:admin_or_owner", ~~~~(2)~~~~
|
|
||||||
"compute:create": "",
|
|
||||||
"compute:create:attach_network": "",
|
|
||||||
"compute:create:attach_volume": "",
|
|
||||||
"compute:get_all": "",
|
|
||||||
"admin_api": "is_admin:True",
|
|
||||||
"compute_extension:accounts": "rule:admin_api",
|
|
||||||
"compute_extension:admin_actions": "rule:admin_api",
|
|
||||||
"compute_extension:admin_actions:pause": "rule:admin_or_owner",
|
|
||||||
"compute_extension:admin_actions:unpause": "rule:admin_or_owner",
|
|
||||||
...
|
|
||||||
"compute_extension:admin_actions:migrate": "rule:admin_api",
|
|
||||||
"compute_extension:aggregates": "rule:admin_api",
|
|
||||||
"compute_extension:certificates": "",
|
|
||||||
...
|
|
||||||
"compute_extension:flavorextraspecs": "",
|
|
||||||
"compute_extension:flavormanage": "rule:admin_api", ~~~~(3)~~~~
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
1. Shows a rule that evaluates successfully if the current user is an
|
|
||||||
administrator or the owner of the resource specified in the request
|
|
||||||
(tenant identifier is equal).
|
|
||||||
|
|
||||||
2. Shows the default policy, which is always evaluated if an API
|
|
||||||
operation does not match any of the policies in ``policy.json``.
|
|
||||||
|
|
||||||
3. Shows a policy restricting the ability to manipulate flavors to
|
|
||||||
administrators using the Admin API only.
|
|
||||||
|
|
||||||
In some cases, some operations should be restricted to administrators
|
|
||||||
only. Therefore, as a further example, let us consider how this sample
|
|
||||||
policy file could be modified in a scenario where we enable users to
|
|
||||||
create their own flavors:
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
"compute_extension:flavormanage": "",
|
|
||||||
|
|
||||||
Users Who Disrupt Other Users
|
|
||||||
-----------------------------
|
|
||||||
|
|
||||||
Users on your cloud can disrupt other users, sometimes intentionally and
|
|
||||||
maliciously and other times by accident. Understanding the situation
|
|
||||||
allows you to make a better decision on how to handle the
|
|
||||||
disruption.
|
|
||||||
|
|
||||||
For example, a group of users have instances that are utilizing a large
|
|
||||||
amount of compute resources for very compute-intensive tasks. This is
|
|
||||||
driving the load up on compute nodes and affecting other users. In this
|
|
||||||
situation, review your user use cases. You may find that high compute
|
|
||||||
scenarios are common, and should then plan for proper segregation in
|
|
||||||
your cloud, such as host aggregation or regions.
|
|
||||||
|
|
||||||
Another example is a user consuming a very large amount of bandwidth.
|
|
||||||
Again, the key is to understand what the user is doing.
|
|
||||||
If she naturally needs a high amount of bandwidth,
|
|
||||||
you might have to limit her transmission rate as to not
|
|
||||||
affect other users or move her to an area with more bandwidth available.
|
|
||||||
On the other hand, maybe her instance has been hacked and is part of a
|
|
||||||
botnet launching DDOS attacks. Resolution of this issue is the same as
|
|
||||||
though any other server on your network has been hacked. Contact the
|
|
||||||
user and give her time to respond. If she doesn't respond, shut down the
|
|
||||||
instance.
|
|
||||||
|
|
||||||
A final example is if a user is hammering cloud resources repeatedly.
|
|
||||||
Contact the user and learn what he is trying to do. Maybe he doesn't
|
|
||||||
understand that what he's doing is inappropriate, or maybe there is an
|
|
||||||
issue with the resource he is trying to access that is causing his
|
|
||||||
requests to queue or lag.
|
|
@ -1,410 +0,0 @@
|
|||||||
=======
|
|
||||||
Preface
|
|
||||||
=======
|
|
||||||
|
|
||||||
OpenStack is an open source platform that lets you build an
|
|
||||||
:term:`Infrastructure-as-a-Service (IaaS)` cloud that runs on commodity
|
|
||||||
hardware.
|
|
||||||
|
|
||||||
Introduction to OpenStack
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
OpenStack believes in open source, open design, and open development,
|
|
||||||
all in an open community that encourages participation by anyone. The
|
|
||||||
long-term vision for OpenStack is to produce a ubiquitous open source
|
|
||||||
cloud computing platform that meets the needs of public and private
|
|
||||||
cloud providers regardless of size. OpenStack services control large
|
|
||||||
pools of compute, storage, and networking resources throughout a data
|
|
||||||
center.
|
|
||||||
|
|
||||||
The technology behind OpenStack consists of a series of interrelated
|
|
||||||
projects delivering various components for a cloud infrastructure
|
|
||||||
solution. Each service provides an open API so that all of these
|
|
||||||
resources can be managed through a dashboard that gives administrators
|
|
||||||
control while empowering users to provision resources through a web
|
|
||||||
interface, a command-line client, or software development kits that
|
|
||||||
support the API. Many OpenStack APIs are extensible, meaning you can
|
|
||||||
keep compatibility with a core set of calls while providing access to
|
|
||||||
more resources and innovating through API extensions. The OpenStack
|
|
||||||
project is a global collaboration of developers and cloud computing
|
|
||||||
technologists. The project produces an open standard cloud computing
|
|
||||||
platform for both public and private clouds. By focusing on ease of
|
|
||||||
implementation, massive scalability, a variety of rich features, and
|
|
||||||
tremendous extensibility, the project aims to deliver a practical and
|
|
||||||
reliable cloud solution for all types of organizations.
|
|
||||||
|
|
||||||
Getting Started with OpenStack
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
As an open source project, one of the unique aspects of OpenStack is
|
|
||||||
that it has many different levels at which you can begin to engage with
|
|
||||||
it—you don't have to do everything yourself.
|
|
||||||
|
|
||||||
Using OpenStack
|
|
||||||
---------------
|
|
||||||
|
|
||||||
You could ask, "Do I even need to build a cloud?" If you want to start
|
|
||||||
using a compute or storage service by just swiping your credit card, you
|
|
||||||
can go to eNovance, HP, Rackspace, or other organizations to start using
|
|
||||||
their public OpenStack clouds. Using their OpenStack cloud resources is
|
|
||||||
similar to accessing the publicly available Amazon Web Services Elastic
|
|
||||||
Compute Cloud (EC2) or Simple Storage Solution (S3).
|
|
||||||
|
|
||||||
Plug and Play OpenStack
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
However, the enticing part of OpenStack might be to build your own
|
|
||||||
private cloud, and there are several ways to accomplish this goal.
|
|
||||||
Perhaps the simplest of all is an appliance-style solution. You purchase
|
|
||||||
an appliance, unpack it, plug in the power and the network, and watch it
|
|
||||||
transform into an OpenStack cloud with minimal additional configuration.
|
|
||||||
|
|
||||||
However, hardware choice is important for many applications, so if that
|
|
||||||
applies to you, consider that there are several software distributions
|
|
||||||
available that you can run on servers, storage, and network products of
|
|
||||||
your choosing. Canonical (where OpenStack replaced Eucalyptus as the
|
|
||||||
default cloud option in 2011), Red Hat, and SUSE offer enterprise
|
|
||||||
OpenStack solutions and support. You may also want to take a look at
|
|
||||||
some of the specialized distributions, such as those from Rackspace,
|
|
||||||
Piston, SwiftStack, or Cloudscaling.
|
|
||||||
|
|
||||||
Alternatively, if you want someone to help guide you through the
|
|
||||||
decisions about the underlying hardware or your applications, perhaps
|
|
||||||
adding in a few features or integrating components along the way,
|
|
||||||
consider contacting one of the system integrators with OpenStack
|
|
||||||
experience, such as Mirantis or Metacloud.
|
|
||||||
|
|
||||||
If your preference is to build your own OpenStack expertise internally,
|
|
||||||
a good way to kick-start that might be to attend or arrange a training
|
|
||||||
session. The OpenStack Foundation has a `Training
|
|
||||||
Marketplace <https://www.openstack.org/marketplace/training>`_ where you
|
|
||||||
can look for nearby events. Also, the OpenStack community is `working to
|
|
||||||
produce <https://wiki.openstack.org/wiki/Training-guides>`_ open source
|
|
||||||
training materials.
|
|
||||||
|
|
||||||
Roll Your Own OpenStack
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
However, this guide has a different audience—those seeking flexibility
|
|
||||||
from the OpenStack framework by deploying do-it-yourself solutions.
|
|
||||||
|
|
||||||
OpenStack is designed for horizontal scalability, so you can easily add
|
|
||||||
new compute, network, and storage resources to grow your cloud over
|
|
||||||
time. In addition to the pervasiveness of massive OpenStack public
|
|
||||||
clouds, many organizations, such as PayPal, Intel, and Comcast, build
|
|
||||||
large-scale private clouds. OpenStack offers much more than a typical
|
|
||||||
software package because it lets you integrate a number of different
|
|
||||||
technologies to construct a cloud. This approach provides great
|
|
||||||
flexibility, but the number of options might be daunting at first.
|
|
||||||
|
|
||||||
Who This Book Is For
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This book is for those of you starting to run OpenStack clouds as well
|
|
||||||
as those of you who were handed an operational one and want to keep it
|
|
||||||
running well. Perhaps you're on a DevOps team, perhaps you are a system
|
|
||||||
administrator starting to dabble in the cloud, or maybe you want to get
|
|
||||||
on the OpenStack cloud team at your company. This book is for all of
|
|
||||||
you.
|
|
||||||
|
|
||||||
This guide assumes that you are familiar with a Linux distribution that
|
|
||||||
supports OpenStack, SQL databases, and virtualization. You must be
|
|
||||||
comfortable administering and configuring multiple Linux machines for
|
|
||||||
networking. You must install and maintain an SQL database and
|
|
||||||
occasionally run queries against it.
|
|
||||||
|
|
||||||
One of the most complex aspects of an OpenStack cloud is the networking
|
|
||||||
configuration. You should be familiar with concepts such as DHCP, Linux
|
|
||||||
bridges, VLANs, and iptables. You must also have access to a network
|
|
||||||
hardware expert who can configure the switches and routers required in
|
|
||||||
your OpenStack cloud.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Cloud computing is quite an advanced topic, and this book requires a
|
|
||||||
lot of background knowledge. However, if you are fairly new to cloud
|
|
||||||
computing, we recommend that you make use of the :doc:`common/glossary`
|
|
||||||
at the back of the book, as well as the online documentation for OpenStack
|
|
||||||
and additional resources mentioned in this book in :doc:`app-resources`.
|
|
||||||
|
|
||||||
Further Reading
|
|
||||||
---------------
|
|
||||||
|
|
||||||
There are other books on the `OpenStack documentation
|
|
||||||
website <https://docs.openstack.org>`_ that can help you get the job
|
|
||||||
done.
|
|
||||||
|
|
||||||
Installation Tutorials and Guides
|
|
||||||
Describes a manual installation process, as in, by hand, without
|
|
||||||
automation, for multiple distributions based on a packaging system:
|
|
||||||
|
|
||||||
- `OpenStack Installation Tutorial for openSUSE and SUSE Linux Enterprise
|
|
||||||
<https://docs.openstack.org/ocata/install-guide-obs/>`_
|
|
||||||
|
|
||||||
- `OpenStack Installation Tutorial for Red Hat Enterprise Linux and CentOS
|
|
||||||
<https://docs.openstack.org/ocata/install-guide-rdo/>`_
|
|
||||||
|
|
||||||
- `OpenStack Installation Tutorial for Ubuntu
|
|
||||||
<https://docs.openstack.org/ocata/install-guide-ubuntu/>`_
|
|
||||||
|
|
||||||
`OpenStack Configuration Reference <https://docs.openstack.org/ocata/config-reference/>`_
|
|
||||||
Contains a reference listing of all configuration options for core
|
|
||||||
and integrated OpenStack services by release version
|
|
||||||
|
|
||||||
`OpenStack Architecture Design Guide <https://docs.openstack.org/arch-design/>`_
|
|
||||||
Contains guidelines for designing an OpenStack cloud
|
|
||||||
|
|
||||||
`OpenStack Administrator Guide <https://docs.openstack.org/admin-guide/>`_
|
|
||||||
Contains how-to information for managing an OpenStack cloud as
|
|
||||||
needed for your use cases, such as storage, computing, or
|
|
||||||
software-defined-networking
|
|
||||||
|
|
||||||
`OpenStack High Availability Guide <https://docs.openstack.org/ha-guide/index.html>`_
|
|
||||||
Describes potential strategies for making your OpenStack services
|
|
||||||
and related controllers and data stores highly available
|
|
||||||
|
|
||||||
`OpenStack Security Guide <https://docs.openstack.org/security-guide/>`_
|
|
||||||
Provides best practices and conceptual information about securing an
|
|
||||||
OpenStack cloud
|
|
||||||
|
|
||||||
`Virtual Machine Image Guide <https://docs.openstack.org/image-guide/>`_
|
|
||||||
Shows you how to obtain, create, and modify virtual machine images
|
|
||||||
that are compatible with OpenStack
|
|
||||||
|
|
||||||
`OpenStack End User Guide <https://docs.openstack.org/user-guide/>`_
|
|
||||||
Shows OpenStack end users how to create and manage resources in an
|
|
||||||
OpenStack cloud with the OpenStack dashboard and OpenStack client
|
|
||||||
commands
|
|
||||||
|
|
||||||
`OpenStack Networking Guide <https://docs.openstack.org/ocata/networking-guide/>`_
|
|
||||||
This guide targets OpenStack administrators seeking to deploy and
|
|
||||||
manage OpenStack Networking (neutron).
|
|
||||||
|
|
||||||
`OpenStack API Guide <https://developer.openstack.org/api-guide/quick-start/>`_
|
|
||||||
A brief overview of how to send REST API requests to endpoints for
|
|
||||||
OpenStack services
|
|
||||||
|
|
||||||
How This Book Is Organized
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This book contains several parts to show best practices and tips for
|
|
||||||
the repeated operations for running OpenStack clouds.
|
|
||||||
|
|
||||||
:doc:`ops-lay-of-the-land`
|
|
||||||
This chapter is written to let you get your hands wrapped around
|
|
||||||
your OpenStack cloud through command-line tools and understanding
|
|
||||||
what is already set up in your cloud.
|
|
||||||
|
|
||||||
:doc:`ops-projects-users`
|
|
||||||
This chapter walks through user-enabling processes that all admins
|
|
||||||
must face to manage users, give them quotas to parcel out resources,
|
|
||||||
and so on.
|
|
||||||
|
|
||||||
:doc:`ops-user-facing-operations`
|
|
||||||
This chapter shows you how to use OpenStack cloud resources and how
|
|
||||||
to train your users.
|
|
||||||
|
|
||||||
:doc:`ops-maintenance`
|
|
||||||
This chapter goes into the common failures that the authors have
|
|
||||||
seen while running clouds in production, including troubleshooting.
|
|
||||||
|
|
||||||
:doc:`ops-network-troubleshooting`
|
|
||||||
Because network troubleshooting is especially difficult with virtual
|
|
||||||
resources, this chapter is chock-full of helpful tips and tricks for
|
|
||||||
tracing network traffic, finding the root cause of networking
|
|
||||||
failures, and debugging related services, such as DHCP and DNS.
|
|
||||||
|
|
||||||
:doc:`ops-logging-monitoring`
|
|
||||||
This chapter shows you where OpenStack places logs and how to best
|
|
||||||
read and manage logs for monitoring purposes.
|
|
||||||
|
|
||||||
:doc:`ops-backup-recovery`
|
|
||||||
This chapter describes what you need to back up within OpenStack as
|
|
||||||
well as best practices for recovering backups.
|
|
||||||
|
|
||||||
:doc:`ops-customize`
|
|
||||||
For readers who need to get a specialized feature into OpenStack,
|
|
||||||
this chapter describes how to use DevStack to write custom
|
|
||||||
middleware or a custom scheduler to rebalance your resources.
|
|
||||||
|
|
||||||
:doc:`ops-advanced-configuration`
|
|
||||||
Much of OpenStack is driver-oriented, so you can plug in different
|
|
||||||
solutions to the base set of services. This chapter describes some
|
|
||||||
advanced configuration topics.
|
|
||||||
|
|
||||||
:doc:`ops-upgrades`
|
|
||||||
This chapter provides upgrade information based on the architectures
|
|
||||||
used in this book.
|
|
||||||
|
|
||||||
**Back matter:**
|
|
||||||
|
|
||||||
:doc:`app-usecases`
|
|
||||||
You can read a small selection of use cases from the OpenStack
|
|
||||||
community with some technical details and further resources.
|
|
||||||
|
|
||||||
:doc:`app-crypt`
|
|
||||||
These are shared legendary tales of image disappearances, VM
|
|
||||||
massacres, and crazy troubleshooting techniques that result in
|
|
||||||
hard-learned lessons and wisdom.
|
|
||||||
|
|
||||||
:doc:`app-roadmaps`
|
|
||||||
Read about how to track the OpenStack roadmap through the open and
|
|
||||||
transparent development processes.
|
|
||||||
|
|
||||||
:doc:`app-resources`
|
|
||||||
So many OpenStack resources are available online because of the
|
|
||||||
fast-moving nature of the project, but there are also resources
|
|
||||||
listed here that the authors found helpful while learning
|
|
||||||
themselves.
|
|
||||||
|
|
||||||
:doc:`common/glossary`
|
|
||||||
A list of terms used in this book is included, which is a subset of
|
|
||||||
the larger OpenStack glossary available online.
|
|
||||||
|
|
||||||
Why and How We Wrote This Book
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
We wrote this book because we have deployed and maintained OpenStack
|
|
||||||
clouds for at least a year and we wanted to share this knowledge with
|
|
||||||
others. After months of being the point people for an OpenStack cloud,
|
|
||||||
we also wanted to have a document to hand to our system administrators
|
|
||||||
so that they'd know how to operate the cloud on a daily basis—both
|
|
||||||
reactively and pro-actively. We wanted to provide more detailed
|
|
||||||
technical information about the decisions that deployers make along the
|
|
||||||
way.
|
|
||||||
|
|
||||||
We wrote this book to help you:
|
|
||||||
|
|
||||||
- Design and create an architecture for your first nontrivial OpenStack
|
|
||||||
cloud. After you read this guide, you'll know which questions to ask
|
|
||||||
and how to organize your compute, networking, and storage resources
|
|
||||||
and the associated software packages.
|
|
||||||
|
|
||||||
- Perform the day-to-day tasks required to administer a cloud.
|
|
||||||
|
|
||||||
We wrote this book in a book sprint, which is a facilitated, rapid
|
|
||||||
development production method for books. For more information, see the
|
|
||||||
`BookSprints site <http://www.booksprints.net/>`_. Your authors cobbled
|
|
||||||
this book together in five days during February 2013, fueled by caffeine
|
|
||||||
and the best takeout food that Austin, Texas, could offer.
|
|
||||||
|
|
||||||
On the first day, we filled white boards with colorful sticky notes to
|
|
||||||
start to shape this nebulous book about how to architect and operate
|
|
||||||
clouds:
|
|
||||||
|
|
||||||
.. figure:: figures/osog_00in01.png
|
|
||||||
:figwidth: 100%
|
|
||||||
|
|
||||||
We wrote furiously from our own experiences and bounced ideas between
|
|
||||||
each other. At regular intervals we reviewed the shape and organization
|
|
||||||
of the book and further molded it, leading to what you see today.
|
|
||||||
|
|
||||||
The team includes:
|
|
||||||
|
|
||||||
Tom Fifield
|
|
||||||
After learning about scalability in computing from particle physics
|
|
||||||
experiments, such as ATLAS at the Large Hadron Collider (LHC) at
|
|
||||||
CERN, Tom worked on OpenStack clouds in production to support the
|
|
||||||
Australian public research sector. Tom currently serves as an
|
|
||||||
OpenStack community manager and works on OpenStack documentation in
|
|
||||||
his spare time.
|
|
||||||
|
|
||||||
Diane Fleming
|
|
||||||
Diane works on the OpenStack API documentation tirelessly. She
|
|
||||||
helped out wherever she could on this project.
|
|
||||||
|
|
||||||
Anne Gentle
|
|
||||||
Anne is the documentation coordinator for OpenStack and also served
|
|
||||||
as an individual contributor to the Google Documentation Summit in
|
|
||||||
2011, working with the Open Street Maps team. She has worked on book
|
|
||||||
sprints in the past, with FLOSS Manuals’ Adam Hyde facilitating.
|
|
||||||
Anne lives in Austin, Texas.
|
|
||||||
|
|
||||||
Lorin Hochstein
|
|
||||||
An academic turned software-developer-slash-operator, Lorin worked
|
|
||||||
as the lead architect for Cloud Services at Nimbis Services, where
|
|
||||||
he deploys OpenStack for technical computing applications. He has
|
|
||||||
been working with OpenStack since the Cactus release. Previously, he
|
|
||||||
worked on high-performance computing extensions for OpenStack at
|
|
||||||
University of Southern California's Information Sciences Institute
|
|
||||||
(USC-ISI).
|
|
||||||
|
|
||||||
Adam Hyde
|
|
||||||
Adam facilitated this book sprint. He also founded the book sprint
|
|
||||||
methodology and is the most experienced book-sprint facilitator
|
|
||||||
around. See `BookSprints <http://www.booksprints.net>`_ for more
|
|
||||||
information. Adam founded FLOSS Manuals—a community of some 3,000
|
|
||||||
individuals developing Free Manuals about Free Software. He is also the
|
|
||||||
founder and project manager for Booktype, an open source project for
|
|
||||||
writing, editing, and publishing books online and in print.
|
|
||||||
|
|
||||||
Jonathan Proulx
|
|
||||||
Jon has been piloting an OpenStack cloud as a senior technical
|
|
||||||
architect at the MIT Computer Science and Artificial Intelligence
|
|
||||||
Lab for his researchers to have as much computing power as they
|
|
||||||
need. He started contributing to OpenStack documentation and
|
|
||||||
reviewing the documentation so that he could accelerate his
|
|
||||||
learning.
|
|
||||||
|
|
||||||
Everett Toews
|
|
||||||
Everett is a developer advocate at Rackspace making OpenStack and
|
|
||||||
the Rackspace Cloud easy to use. Sometimes developer, sometimes
|
|
||||||
advocate, and sometimes operator, he's built web applications,
|
|
||||||
taught workshops, given presentations around the world, and deployed
|
|
||||||
OpenStack for production use by academia and business.
|
|
||||||
|
|
||||||
Joe Topjian
|
|
||||||
Joe has designed and deployed several clouds at Cybera, a nonprofit
|
|
||||||
where they are building e-infrastructure to support entrepreneurs
|
|
||||||
and local researchers in Alberta, Canada. He also actively maintains
|
|
||||||
and operates these clouds as a systems architect, and his
|
|
||||||
experiences have generated a wealth of troubleshooting skills for
|
|
||||||
cloud environments.
|
|
||||||
|
|
||||||
OpenStack community members
|
|
||||||
Many individual efforts keep a community book alive. Our community
|
|
||||||
members updated content for this book year-round. Also, a year after
|
|
||||||
the first sprint, Jon Proulx hosted a second two-day mini-sprint at
|
|
||||||
MIT with the goal of updating the book for the latest release. Since
|
|
||||||
the book's inception, more than 30 contributors have supported this
|
|
||||||
book. We have a tool chain for reviews, continuous builds, and
|
|
||||||
translations. Writers and developers continuously review patches,
|
|
||||||
enter doc bugs, edit content, and fix doc bugs. We want to recognize
|
|
||||||
their efforts!
|
|
||||||
|
|
||||||
The following people have contributed to this book: Akihiro Motoki,
|
|
||||||
Alejandro Avella, Alexandra Settle, Andreas Jaeger, Andy McCallum,
|
|
||||||
Benjamin Stassart, Chandan Kumar, Chris Ricker, David Cramer, David
|
|
||||||
Wittman, Denny Zhang, Emilien Macchi, Gauvain Pocentek, Ignacio
|
|
||||||
Barrio, James E. Blair, Jay Clark, Jeff White, Jeremy Stanley, K
|
|
||||||
Jonathan Harker, KATO Tomoyuki, Lana Brindley, Laura Alves, Lee Li,
|
|
||||||
Lukasz Jernas, Mario B. Codeniera, Matthew Kassawara, Michael Still,
|
|
||||||
Monty Taylor, Nermina Miller, Nigel Williams, Phil Hopkins, Russell
|
|
||||||
Bryant, Sahid Orentino Ferdjaoui, Sandy Walsh, Sascha Peilicke, Sean
|
|
||||||
M. Collins, Sergey Lukjanov, Shilla Saebi, Stephen Gordon, Summer
|
|
||||||
Long, Uwe Stuehler, Vaibhav Bhatkar, Veronica Musso, Ying Chun
|
|
||||||
"Daisy" Guo, Zhengguang Ou, and ZhiQiang Fan.
|
|
||||||
|
|
||||||
How to Contribute to This Book
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The genesis of this book was an in-person event, but now that the book
|
|
||||||
is in your hands, we want you to contribute to it. OpenStack
|
|
||||||
documentation follows the coding principles of iterative work, with bug
|
|
||||||
logging, investigating, and fixing. We also store the source content on
|
|
||||||
GitHub and invite collaborators through the OpenStack Gerrit
|
|
||||||
installation, which offers reviews. For the O'Reilly edition of this
|
|
||||||
book, we are using the company's Atlas system, which also stores source
|
|
||||||
content on GitHub and enables collaboration among contributors.
|
|
||||||
|
|
||||||
Learn more about how to contribute to the OpenStack docs at `OpenStack
|
|
||||||
Documentation Contributor
|
|
||||||
Guide <https://docs.openstack.org/contributor-guide/>`_.
|
|
||||||
|
|
||||||
If you find a bug and can't fix it or aren't sure it's really a doc bug,
|
|
||||||
log a bug at `OpenStack
|
|
||||||
Manuals <https://bugs.launchpad.net/openstack-manuals>`_. Tag the bug
|
|
||||||
under Extra options with the ``ops-guide`` tag to indicate that the bug
|
|
||||||
is in this guide. You can assign the bug to yourself if you know how to
|
|
||||||
fix it. Also, a member of the OpenStack doc-core team can triage the doc
|
|
||||||
bug.
|
|
@ -29,12 +29,11 @@ done
|
|||||||
# PDF targets for Install guides are dealt in build-install-guides-rst.sh
|
# PDF targets for Install guides are dealt in build-install-guides-rst.sh
|
||||||
PDF_TARGETS=( 'arch-design'\
|
PDF_TARGETS=( 'arch-design'\
|
||||||
'ha-guide' \
|
'ha-guide' \
|
||||||
'image-guide'\
|
'image-guide')
|
||||||
'ops-guide' )
|
|
||||||
|
|
||||||
# Note that these guides are only build for master branch
|
# Note that these guides are only build for master branch
|
||||||
for guide in admin-guide arch-design contributor-guide \
|
for guide in arch-design contributor-guide \
|
||||||
ha-guide image-guide ops-guide; do
|
ha-guide image-guide; do
|
||||||
if [[ ${PDF_TARGETS[*]} =~ $guide ]]; then
|
if [[ ${PDF_TARGETS[*]} =~ $guide ]]; then
|
||||||
tools/build-rst.sh doc/$guide --build build \
|
tools/build-rst.sh doc/$guide --build build \
|
||||||
--target $guide $LINKCHECK $PDF_OPTION
|
--target $guide $LINKCHECK $PDF_OPTION
|
||||||
|
@ -33,7 +33,6 @@ function copy_to_branch {
|
|||||||
rm -f publish-docs/$BRANCH/draft-index.html
|
rm -f publish-docs/$BRANCH/draft-index.html
|
||||||
# We don't need these draft guides on the branch
|
# We don't need these draft guides on the branch
|
||||||
rm -rf publish-docs/$BRANCH/arch-design-to-archive
|
rm -rf publish-docs/$BRANCH/arch-design-to-archive
|
||||||
rm -rf publish-docs/$BRANCH/ops-guide
|
|
||||||
|
|
||||||
for f in $(find publish-docs/$BRANCH -name "atom.xml"); do
|
for f in $(find publish-docs/$BRANCH -name "atom.xml"); do
|
||||||
sed -i -e "s|/draft/|/$BRANCH/|g" $f
|
sed -i -e "s|/draft/|/$BRANCH/|g" $f
|
||||||
|
@ -46,6 +46,9 @@ redirectmatch 301 "^/releases.*$" http://releases.openstack.org$1
|
|||||||
# Redirect removed user guide
|
# Redirect removed user guide
|
||||||
redirectmatch 301 /user-guide/.*$ /user/
|
redirectmatch 301 /user-guide/.*$ /user/
|
||||||
|
|
||||||
|
# Redirect removed ops guide
|
||||||
|
redirectmatch 301 /ops-guide/.*$ /admin/
|
||||||
|
|
||||||
# Redirect changed directory name in the Contributor Guide
|
# Redirect changed directory name in the Contributor Guide
|
||||||
redirect 301 /contributor-guide/ui-text-guidelines.html /contributor-guide/ux-ui-guidelines/ui-text-guidelines.html
|
redirect 301 /contributor-guide/ui-text-guidelines.html /contributor-guide/ux-ui-guidelines/ui-text-guidelines.html
|
||||||
redirect 301 /contributor-guide/ui-text-guidelines /contributor-guide/ux-ui-guidelines
|
redirect 301 /contributor-guide/ui-text-guidelines /contributor-guide/ux-ui-guidelines
|
||||||
|