Report backend state in service list

Currently, Cinder couldn't report backend state to service,
operators only know that cinder-volume process is up,
but isn't aware of if the backend storage device is ok.
Users still can create volume and go to fail over and over again.

This bp propose to get the backend state from driver directly,
operators or management system could query this via service list.

This function will improve the maintenance ability for Cinder.

Change-Id: I561dca3ef7c1901401621bc112389dbd178a907e
Implements: blueprint report-backend-state-in-service-list
This commit is contained in:
wanghao 2017-12-09 14:58:01 +08:00
parent 9054fb0df3
commit 0dc8390e11
9 changed files with 117 additions and 2 deletions

View File

@ -34,6 +34,7 @@ from cinder.scheduler import rpcapi as scheduler_rpcapi
from cinder import utils
from cinder import volume
from cinder.volume import rpcapi as volume_rpcapi
from cinder.volume import utils as volume_utils
CONF = cfg.CONF
@ -73,6 +74,16 @@ class ServiceController(wsgi.Controller):
filters['binary'] = req.GET['binary']
services = objects.ServiceList.get_all(context, filters)
# Get backend state from scheduler
if req.api_version_request.matches(mv.BACKEND_STATE_REPORT):
backend_state_map = {}
scheduler_api = self.rpc_apis[constants.SCHEDULER_BINARY]
pools = scheduler_api.get_pools(context)
for pool in pools:
backend_name = volume_utils.extract_host(pool.get("name"))
back_state = pool.get('capabilities', {}).get('backend_state',
'up')
backend_state_map[backend_name] = back_state
svcs = []
for svc in services:
@ -95,6 +106,10 @@ class ServiceController(wsgi.Controller):
'status': active, 'state': art,
'updated_at': updated_at}
if (req.api_version_request.matches(mv.BACKEND_STATE_REPORT) and
svc.binary == constants.VOLUME_BINARY):
ret_fields['backend_state'] = backend_state_map.get(svc.host)
# On CLUSTER_SUPPORT we added cluster support
if req.api_version_request.matches(mv.CLUSTER_SUPPORT):
ret_fields['cluster'] = svc.cluster_name

View File

@ -135,6 +135,8 @@ VOLUME_CREATE_FROM_BACKUP = '3.47'
VOLUME_SHARED_TARGETS_AND_SERVICE_FIELDS = '3.48'
BACKEND_STATE_REPORT = '3.49'
def get_mv_header(version):
"""Gets a formatted HTTP microversion header.

View File

@ -112,6 +112,7 @@ REST_API_VERSION_HISTORY = """
* 3.46 - Support create volume by Nova specific image (0 size image).
* 3.47 - Support create volume from backup.
* 3.48 - Add ``shared_targets`` and ``service_uuid`` fields to volume.
* 3.49 - Support report backend storage state in service list.
"""
# The minimum and maximum versions of the API supported
@ -119,7 +120,7 @@ REST_API_VERSION_HISTORY = """
# minimum version of the API supported.
# Explicitly using /v2 endpoints will still work
_MIN_API_VERSION = "3.0"
_MAX_API_VERSION = "3.48"
_MAX_API_VERSION = "3.49"
_LEGACY_API_VERSION2 = "2.0"
UPDATED = "2017-09-19T20:18:14Z"

View File

@ -390,3 +390,7 @@ Support create volume from backup.
3.48
----
Add ``shared_targets`` and ``service_uuid`` fields to volume.
3.49
----
Support report backend storage state in service list.

View File

@ -191,7 +191,13 @@ def fake_utcnow(with_timezone=False):
return datetime.datetime(2012, 10, 29, 13, 42, 11, tzinfo=tzinfo)
def fake_get_pools(ctxt, filters=None):
return [{"name": "host1", "capabilities": {"backend_state": "up"}},
{"name": "host2", "capabilities": {"backend_state": "down"}}]
@ddt.ddt
@mock.patch('cinder.scheduler.rpcapi.SchedulerAPI.get_pools', fake_get_pools)
@mock.patch('cinder.db.service_get_all', fake_service_get_all)
@mock.patch('cinder.db.service_get', fake_service_get)
@mock.patch('oslo_utils.timeutils.utcnow', fake_utcnow)
@ -336,6 +342,63 @@ class ServicesTest(test.TestCase):
]}
self.assertEqual(response, res_dict)
def test_services_list_with_backend_state(self):
req = FakeRequest(version=mv.BACKEND_STATE_REPORT)
res_dict = self.controller.index(req)
response = {'services': [{'binary': 'cinder-scheduler',
'cluster': None,
'host': 'host1', 'zone': 'cinder',
'status': 'disabled', 'state': 'up',
'updated_at': datetime.datetime(
2012, 10, 29, 13, 42, 2)},
{'binary': 'cinder-volume',
'cluster': None,
'host': 'host1', 'zone': 'cinder',
'status': 'disabled', 'state': 'up',
'updated_at': datetime.datetime(
2012, 10, 29, 13, 42, 5),
'backend_state': 'up'},
{'binary': 'cinder-scheduler',
'cluster': 'cluster1',
'host': 'host2',
'zone': 'cinder',
'status': 'enabled', 'state': 'down',
'updated_at': datetime.datetime(
2012, 9, 19, 6, 55, 34)},
{'binary': 'cinder-volume',
'cluster': 'cluster1',
'host': 'host2',
'zone': 'cinder',
'status': 'disabled', 'state': 'down',
'updated_at': datetime.datetime(
2012, 9, 18, 8, 3, 38),
'backend_state': 'down'},
{'binary': 'cinder-volume',
'cluster': 'cluster2',
'host': 'host2',
'zone': 'cinder',
'status': 'disabled', 'state': 'down',
'updated_at': datetime.datetime(
2012, 10, 29, 13, 42, 5),
'backend_state': 'down'},
{'binary': 'cinder-volume',
'cluster': 'cluster2',
'host': 'host2',
'zone': 'cinder',
'status': 'enabled', 'state': 'down',
'updated_at': datetime.datetime(
2012, 9, 18, 8, 3, 38),
'backend_state': 'down'},
{'binary': 'cinder-scheduler',
'cluster': None,
'host': 'host2',
'zone': 'cinder',
'status': 'enabled', 'state': 'down',
'updated_at': None},
]}
self.assertEqual(response, res_dict)
def test_services_detail(self):
self.ext_mgr.extensions['os-extended-services'] = True
self.controller = services.ServiceController(self.ext_mgr)

View File

@ -276,7 +276,8 @@ class LVMVolumeDriver(driver.VolumeDriver):
total_volumes=total_volumes,
filter_function=self.get_filter_function(),
goodness_function=self.get_goodness_function(),
multiattach=False
multiattach=False,
backend_state='up'
))
data["pools"].append(single_pool)
data["shared_targets"] = False

View File

@ -0,0 +1,20 @@
====================================
Report backend state in service list
====================================
Currently, Cinder couldn't report backend state to service, operators only
know that cinder-volume process is up, but isn't aware of whether the backend
storage device is ok. Users still can create volume and go to fail over and
over again. To make maintenance easier, operator could query storage device
state via service list and fix the problem more quickly. If device state is
*down*, that means volume creation will fail.
To do so, use the Block Storage API: service list to get the backend state.
Run this command:
.. code-block:: console
$ openstack volume service list
Add backend_state: up/down into response body of service list. This feature
is supported after microversion 3.49.

View File

@ -45,6 +45,7 @@ Amazon EC2 Elastic Block Storage (EBS) offering.
blockstorage-volume-backups.rst
blockstorage-volume-migration.rst
blockstorage-volume-number-weigher.rst
blockstorage-report-backend-state.rst
.. _`Storage Decisions`: https://docs.openstack.org/arch-design/design-storage/design-storage-arch.html
.. _`OpenStack Operations Guide`: https://wiki.openstack.org/wiki/OpsGuide

View File

@ -0,0 +1,8 @@
---
features:
- |
Added "backend_state: up/down" in response body of service list if
context is admin. This feature will help operators or cloud management
system to get the backend device state in every service.
If device state is *down*, specify that storage device has got some
problems. Give more information to locate bugs quickly.