Report backend state in service list

Currently, Cinder couldn't report backend state to service, operators only know that cinder-volume process is up, but isn't aware of if the backend storage device is ok. Users still can create volume and go to fail over and over again. This bp propose to get the backend state from driver directly, operators or management system could query this via service list. This function will improve the maintenance ability for Cinder. Change-Id: I561dca3ef7c1901401621bc112389dbd178a907e Implements: blueprint report-backend-state-in-service-list
2017-12-09 14:58:01 +08:00 · 2017-12-09 14:58:01 +08:00 · 0dc8390e11
commit 0dc8390e11
parent 9054fb0df3
9 changed files with 117 additions and 2 deletions
--- a/cinder/api/contrib/services.py
+++ b/cinder/api/contrib/services.py
@ -34,6 +34,7 @@ from cinder.scheduler import rpcapi as scheduler_rpcapi
 from cinder import utils
 from cinder import volume
 from cinder.volume import rpcapi as volume_rpcapi
+from cinder.volume import utils as volume_utils


 CONF = cfg.CONF
@ -73,6 +74,16 @@ class ServiceController(wsgi.Controller):
            filters['binary'] = req.GET['binary']

        services = objects.ServiceList.get_all(context, filters)
+        # Get backend state from scheduler
+        if req.api_version_request.matches(mv.BACKEND_STATE_REPORT):
+            backend_state_map = {}
+            scheduler_api = self.rpc_apis[constants.SCHEDULER_BINARY]
+            pools = scheduler_api.get_pools(context)
+            for pool in pools:
+                backend_name = volume_utils.extract_host(pool.get("name"))
+                back_state = pool.get('capabilities', {}).get('backend_state',
+                                                              'up')
+                backend_state_map[backend_name] = back_state

        svcs = []
        for svc in services:
@ -95,6 +106,10 @@ class ServiceController(wsgi.Controller):
                          'status': active, 'state': art,
                          'updated_at': updated_at}

+            if (req.api_version_request.matches(mv.BACKEND_STATE_REPORT) and
+                    svc.binary == constants.VOLUME_BINARY):
+                ret_fields['backend_state'] = backend_state_map.get(svc.host)
+
            # On CLUSTER_SUPPORT we added cluster support
            if req.api_version_request.matches(mv.CLUSTER_SUPPORT):
                ret_fields['cluster'] = svc.cluster_name
--- a/cinder/api/microversions.py
+++ b/cinder/api/microversions.py
@ -135,6 +135,8 @@ VOLUME_CREATE_FROM_BACKUP = '3.47'

 VOLUME_SHARED_TARGETS_AND_SERVICE_FIELDS = '3.48'

+BACKEND_STATE_REPORT = '3.49'
+

 def get_mv_header(version):
    """Gets a formatted HTTP microversion header.
--- a/cinder/api/openstack/api_version_request.py
+++ b/cinder/api/openstack/api_version_request.py
@ -112,6 +112,7 @@ REST_API_VERSION_HISTORY = """
    * 3.46 - Support create volume by Nova specific image (0 size image).
    * 3.47 - Support create volume from backup.
    * 3.48 - Add ``shared_targets`` and ``service_uuid`` fields to volume.
+    * 3.49 - Support report backend storage state in service list.
 """

 # The minimum and maximum versions of the API supported
@ -119,7 +120,7 @@ REST_API_VERSION_HISTORY = """
 # minimum version of the API supported.
 # Explicitly using /v2 endpoints will still work
 _MIN_API_VERSION = "3.0"
-_MAX_API_VERSION = "3.48"
+_MAX_API_VERSION = "3.49"
 _LEGACY_API_VERSION2 = "2.0"
 UPDATED = "2017-09-19T20:18:14Z"

--- a/cinder/api/openstack/rest_api_version_history.rst
+++ b/cinder/api/openstack/rest_api_version_history.rst
@ -390,3 +390,7 @@ Support create volume from backup.
 3.48
 ----
 Add ``shared_targets`` and ``service_uuid`` fields to volume.
+
+3.49
+----
+  Support report backend storage state in service list.
--- a/cinder/tests/unit/api/contrib/test_services.py
+++ b/cinder/tests/unit/api/contrib/test_services.py
@ -191,7 +191,13 @@ def fake_utcnow(with_timezone=False):
    return datetime.datetime(2012, 10, 29, 13, 42, 11, tzinfo=tzinfo)


+def fake_get_pools(ctxt, filters=None):
+    return [{"name": "host1", "capabilities": {"backend_state": "up"}},
+            {"name": "host2", "capabilities": {"backend_state": "down"}}]
+
+
@ddt.ddt
+@mock.patch('cinder.scheduler.rpcapi.SchedulerAPI.get_pools', fake_get_pools)
@mock.patch('cinder.db.service_get_all', fake_service_get_all)
@mock.patch('cinder.db.service_get', fake_service_get)
@mock.patch('oslo_utils.timeutils.utcnow', fake_utcnow)
@ -336,6 +342,63 @@ class ServicesTest(test.TestCase):
                                 ]}
        self.assertEqual(response, res_dict)

+    def test_services_list_with_backend_state(self):
+        req = FakeRequest(version=mv.BACKEND_STATE_REPORT)
+        res_dict = self.controller.index(req)
+
+        response = {'services': [{'binary': 'cinder-scheduler',
+                                  'cluster': None,
+                                  'host': 'host1', 'zone': 'cinder',
+                                  'status': 'disabled', 'state': 'up',
+                                  'updated_at': datetime.datetime(
+                                      2012, 10, 29, 13, 42, 2)},
+                                 {'binary': 'cinder-volume',
+                                  'cluster': None,
+                                  'host': 'host1', 'zone': 'cinder',
+                                  'status': 'disabled', 'state': 'up',
+                                  'updated_at': datetime.datetime(
+                                      2012, 10, 29, 13, 42, 5),
+                                  'backend_state': 'up'},
+                                 {'binary': 'cinder-scheduler',
+                                  'cluster': 'cluster1',
+                                  'host': 'host2',
+                                  'zone': 'cinder',
+                                  'status': 'enabled', 'state': 'down',
+                                  'updated_at': datetime.datetime(
+                                      2012, 9, 19, 6, 55, 34)},
+                                 {'binary': 'cinder-volume',
+                                  'cluster': 'cluster1',
+                                  'host': 'host2',
+                                  'zone': 'cinder',
+                                  'status': 'disabled', 'state': 'down',
+                                  'updated_at': datetime.datetime(
+                                      2012, 9, 18, 8, 3, 38),
+                                  'backend_state': 'down'},
+                                 {'binary': 'cinder-volume',
+                                  'cluster': 'cluster2',
+                                  'host': 'host2',
+                                  'zone': 'cinder',
+                                  'status': 'disabled', 'state': 'down',
+                                  'updated_at': datetime.datetime(
+                                      2012, 10, 29, 13, 42, 5),
+                                  'backend_state': 'down'},
+                                 {'binary': 'cinder-volume',
+                                  'cluster': 'cluster2',
+                                  'host': 'host2',
+                                  'zone': 'cinder',
+                                  'status': 'enabled', 'state': 'down',
+                                  'updated_at': datetime.datetime(
+                                      2012, 9, 18, 8, 3, 38),
+                                  'backend_state': 'down'},
+                                 {'binary': 'cinder-scheduler',
+                                  'cluster': None,
+                                  'host': 'host2',
+                                  'zone': 'cinder',
+                                  'status': 'enabled', 'state': 'down',
+                                  'updated_at': None},
+                                 ]}
+        self.assertEqual(response, res_dict)
+
    def test_services_detail(self):
        self.ext_mgr.extensions['os-extended-services'] = True
        self.controller = services.ServiceController(self.ext_mgr)
--- a/cinder/volume/drivers/lvm.py
+++ b/cinder/volume/drivers/lvm.py
@ -276,7 +276,8 @@ class LVMVolumeDriver(driver.VolumeDriver):
            total_volumes=total_volumes,
            filter_function=self.get_filter_function(),
            goodness_function=self.get_goodness_function(),
-            multiattach=False
+            multiattach=False,
+            backend_state='up'
        ))
        data["pools"].append(single_pool)
        data["shared_targets"] = False
--- a/doc/source/admin/blockstorage-report-backend-state.rst
+++ b/doc/source/admin/blockstorage-report-backend-state.rst
@ -0,0 +1,20 @@
+====================================
+Report backend state in service list
+====================================
+
+Currently, Cinder couldn't report backend state to service, operators only
+know that cinder-volume process is up, but isn't aware of whether the backend
+storage device is ok. Users still can create volume and go to fail over and
+over again. To make maintenance easier, operator could query storage device
+state via service list and fix the problem more quickly. If device state is
+*down*, that means volume creation will fail.
+
+To do so, use the Block Storage API: service list to get the backend state.
+Run this command:
+
+.. code-block:: console
+
+   $ openstack volume service list
+
+Add backend_state: up/down into response body of service list. This feature
+is supported after microversion 3.49.
--- a/doc/source/admin/index.rst
+++ b/doc/source/admin/index.rst
@ -45,6 +45,7 @@ Amazon EC2 Elastic Block Storage (EBS) offering.
   blockstorage-volume-backups.rst
   blockstorage-volume-migration.rst
   blockstorage-volume-number-weigher.rst
+   blockstorage-report-backend-state.rst

 .. _`Storage Decisions`: https://docs.openstack.org/arch-design/design-storage/design-storage-arch.html
 .. _`OpenStack Operations Guide`: https://wiki.openstack.org/wiki/OpsGuide
--- a/releasenotes/notes/report-backend-state-in-service-list-1e4ee5a2c623671e.yaml
+++ b/releasenotes/notes/report-backend-state-in-service-list-1e4ee5a2c623671e.yaml
@ -0,0 +1,8 @@
+---
+features:
+  - |
+    Added "backend_state: up/down" in response body of service list if
+    context is admin. This feature will help operators or cloud management
+    system to get the backend device state in every service.
+    If device state is *down*, specify that storage device has got some
+    problems. Give more information to locate bugs quickly.