Vmware: Revert the volume stats revert.
This patch reverts the removal of the collection of volume stats
from the vCenter. Of course there is a performance hit when
collecting volume stats against a backend as compared to doing
nothing. The problem with not having any stats collection is that
the scheduler can't schedule properly against this driver, especially
when there are multiple of them deployed and heterogeniously against
other backends. This revert fixes existing deployments against
this backend.
This patch includes a new vmware driver config optiont that allows one
to enable the stats collection, so that a deployment with this driver
can actually work with the scheduler. When a driver doesn't report
any statistics, the scheduler has no way to determine the capacity
utilization of the driver and will always assume it can take a
provisioning request. This is especially problematic in a
heterogeniously deployed environment, where other drivers do report
valid stats.
This reverts commit f3ee3dbf4e
.
Change-Id: Ifdd9b89e00de692a7a9e121ed421b921aa8bfc70
Closes-Bug: 1859864
This commit is contained in:
parent
10bbfcc78e
commit
d987062d15
@ -44,6 +44,7 @@ class VMwareVStorageObjectDriverTestCase(test.TestCase):
|
||||
IP = 'localhost'
|
||||
PORT = 2321
|
||||
IMG_TX_TIMEOUT = 10
|
||||
RESERVED_PERCENTAGE = 0
|
||||
VMDK_DRIVER = vmdk.VMwareVcVmdkDriver
|
||||
FCD_DRIVER = fcd.VMwareVStorageObjectDriver
|
||||
VC_VERSION = "6.7.0"
|
||||
@ -64,6 +65,7 @@ class VMwareVStorageObjectDriverTestCase(test.TestCase):
|
||||
self._config.vmware_host_ip = self.IP
|
||||
self._config.vmware_host_port = self.PORT
|
||||
self._config.vmware_image_transfer_timeout_secs = self.IMG_TX_TIMEOUT
|
||||
self._config.reserved_percentage = self.RESERVED_PERCENTAGE
|
||||
self._driver = fcd.VMwareVStorageObjectDriver(
|
||||
configuration=self._config)
|
||||
self._driver._vc_version = self.VC_VERSION
|
||||
@ -82,15 +84,39 @@ class VMwareVStorageObjectDriverTestCase(test.TestCase):
|
||||
self.assertTrue(self._driver._use_fcd_snapshot)
|
||||
self.assertTrue(self._driver._storage_policy_enabled)
|
||||
|
||||
def test_get_volume_stats(self):
|
||||
@mock.patch.object(VMDK_DRIVER, 'volumeops')
|
||||
@mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries')
|
||||
def test_get_volume_stats(self, _get_datastore_summaries, vops):
|
||||
FREE_GB = 7
|
||||
TOTAL_GB = 11
|
||||
|
||||
class ObjMock(object):
|
||||
def __init__(self, **kwargs):
|
||||
self.__dict__.update(kwargs)
|
||||
|
||||
_get_datastore_summaries.return_value = \
|
||||
ObjMock(objects= [
|
||||
ObjMock(propSet = [
|
||||
ObjMock(name = "host",
|
||||
val = ObjMock(DatastoreHostMount = [])),
|
||||
ObjMock(name = "summary",
|
||||
val = ObjMock(freeSpace = FREE_GB * units.Gi,
|
||||
capacity = TOTAL_GB * units.Gi,
|
||||
accessible = True))
|
||||
])
|
||||
])
|
||||
|
||||
vops._in_maintenance.return_value = False
|
||||
|
||||
stats = self._driver.get_volume_stats()
|
||||
|
||||
self.assertEqual('VMware', stats['vendor_name'])
|
||||
self.assertEqual(self._driver.VERSION, stats['driver_version'])
|
||||
self.assertEqual(self._driver.STORAGE_TYPE, stats['storage_protocol'])
|
||||
self.assertEqual(0, stats['reserved_percentage'])
|
||||
self.assertEqual('unknown', stats['total_capacity_gb'])
|
||||
self.assertEqual('unknown', stats['free_capacity_gb'])
|
||||
self.assertEqual(self.RESERVED_PERCENTAGE,
|
||||
stats['reserved_percentage'])
|
||||
self.assertEqual(TOTAL_GB, stats['total_capacity_gb'])
|
||||
self.assertEqual(FREE_GB, stats['free_capacity_gb'])
|
||||
|
||||
def _create_volume_dict(self,
|
||||
vol_id=VOL_ID,
|
||||
|
@ -34,15 +34,29 @@ from cinder.tests.unit import fake_constants
|
||||
from cinder.tests.unit import fake_snapshot
|
||||
from cinder.tests.unit import fake_volume
|
||||
from cinder.tests.unit import utils as test_utils
|
||||
from cinder.volume import configuration
|
||||
from cinder.volume.drivers.vmware import datastore as hub
|
||||
from cinder.volume.drivers.vmware import exceptions as vmdk_exceptions
|
||||
from cinder.volume.drivers.vmware import vmdk
|
||||
from cinder.volume.drivers.vmware import volumeops
|
||||
|
||||
|
||||
class MockConfiguration(object):
|
||||
def __init__(self, **kwargs):
|
||||
for kw in kwargs:
|
||||
setattr(self, kw, kwargs[kw])
|
||||
|
||||
def safe_get(self, name):
|
||||
return getattr(self, name) if hasattr(self, name) else None
|
||||
|
||||
def append_config_values(self, opts):
|
||||
for opt in opts:
|
||||
if not hasattr(self, opt.name):
|
||||
setattr(self, opt.name, opt.default or None)
|
||||
|
||||
# TODO(vbala) Split test methods handling multiple cases into multiple methods,
|
||||
# each handling a specific case.
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class VMwareVcVmdkDriverTestCase(test.TestCase):
|
||||
"""Unit tests for VMwareVcVmdkDriver."""
|
||||
@ -80,27 +94,29 @@ class VMwareVcVmdkDriverTestCase(test.TestCase):
|
||||
def setUp(self):
|
||||
super(VMwareVcVmdkDriverTestCase, self).setUp()
|
||||
|
||||
self._config = mock.Mock(spec=configuration.Configuration)
|
||||
self._config.vmware_host_ip = self.IP
|
||||
self._config.vmware_host_port = self.PORT
|
||||
self._config.vmware_host_username = self.USERNAME
|
||||
self._config.vmware_host_password = self.PASSWORD
|
||||
self._config.vmware_wsdl_location = None
|
||||
self._config.vmware_volume_folder = self.VOLUME_FOLDER
|
||||
self._config.vmware_api_retry_count = self.API_RETRY_COUNT
|
||||
self._config.vmware_task_poll_interval = self.TASK_POLL_INTERVAL
|
||||
self._config.vmware_image_transfer_timeout_secs = self.IMG_TX_TIMEOUT
|
||||
self._config.vmware_max_objects_retrieval = self.MAX_OBJECTS
|
||||
self._config.vmware_tmp_dir = self.TMP_DIR
|
||||
self._config.vmware_ca_file = self.CA_FILE
|
||||
self._config.vmware_insecure = False
|
||||
self._config.vmware_cluster_name = self.CLUSTERS
|
||||
self._config.vmware_host_version = self.DEFAULT_VC_VERSION
|
||||
self._config.vmware_connection_pool_size = self.POOL_SIZE
|
||||
self._config.vmware_adapter_type = self.ADAPTER_TYPE
|
||||
self._config.vmware_snapshot_format = self.SNAPSHOT_FORMAT
|
||||
self._config.vmware_lazy_create = True
|
||||
self._config.vmware_datastore_regex = None
|
||||
self._config = MockConfiguration(
|
||||
vmware_host_ip=self.IP,
|
||||
vmware_host_port=self.PORT,
|
||||
vmware_host_username=self.USERNAME,
|
||||
vmware_host_password=self.PASSWORD,
|
||||
vmware_wsdl_location=None,
|
||||
vmware_volume_folder=self.VOLUME_FOLDER,
|
||||
vmware_api_retry_count=self.API_RETRY_COUNT,
|
||||
vmware_task_poll_interval=self.TASK_POLL_INTERVAL,
|
||||
vmware_image_transfer_timeout_secs=self.IMG_TX_TIMEOUT,
|
||||
vmware_max_objects_retrieval=self.MAX_OBJECTS,
|
||||
vmware_tmp_dir=self.TMP_DIR,
|
||||
vmware_ca_file=self.CA_FILE,
|
||||
vmware_insecure=False,
|
||||
vmware_cluster_name=self.CLUSTERS,
|
||||
vmware_host_version=self.DEFAULT_VC_VERSION,
|
||||
vmware_connection_pool_size=self.POOL_SIZE,
|
||||
vmware_adapter_type=self.ADAPTER_TYPE,
|
||||
vmware_snapshot_format=self.SNAPSHOT_FORMAT,
|
||||
vmware_lazy_create=True,
|
||||
vmware_datastore_regex=None,
|
||||
reserved_percentage=0
|
||||
)
|
||||
|
||||
self._db = mock.Mock()
|
||||
self._driver = vmdk.VMwareVcVmdkDriver(configuration=self._config,
|
||||
@ -109,15 +125,57 @@ class VMwareVcVmdkDriverTestCase(test.TestCase):
|
||||
self._context = context.get_admin_context()
|
||||
self.updated_at = timeutils.utcnow()
|
||||
|
||||
def test_get_volume_stats(self):
|
||||
@mock.patch.object(VMDK_DRIVER, 'volumeops')
|
||||
@mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries')
|
||||
def test_get_volume_stats(self, _get_datastore_summaries, vops):
|
||||
FREE_GB = 7
|
||||
TOTAL_GB = 11
|
||||
|
||||
class ObjMock(object):
|
||||
def __init__(self, **kwargs):
|
||||
self.__dict__.update(kwargs)
|
||||
|
||||
_get_datastore_summaries.return_value = (ObjMock(objects= [
|
||||
ObjMock(propSet = [
|
||||
ObjMock(name = "host",
|
||||
val = ObjMock(DatastoreHostMount = [])),
|
||||
ObjMock(name = "summary",
|
||||
val = ObjMock(freeSpace = FREE_GB * units.Gi,
|
||||
capacity = TOTAL_GB * units.Gi,
|
||||
accessible = True))
|
||||
])
|
||||
]))
|
||||
|
||||
vops._in_maintenance.return_value = False
|
||||
# Enable volume stats collection from backend
|
||||
self._driver.configuration.vmware_enable_volume_stats = True
|
||||
|
||||
stats = self._driver.get_volume_stats()
|
||||
|
||||
self.assertEqual('VMware', stats['vendor_name'])
|
||||
self.assertEqual(self._driver.VERSION, stats['driver_version'])
|
||||
self.assertEqual('vmdk', stats['storage_protocol'])
|
||||
self.assertEqual(0, stats['reserved_percentage'])
|
||||
self.assertEqual('unknown', stats['total_capacity_gb'])
|
||||
self.assertEqual('unknown', stats['free_capacity_gb'])
|
||||
self.assertEqual(self._config.reserved_percentage,
|
||||
stats['reserved_percentage'])
|
||||
self.assertEqual(TOTAL_GB, stats['total_capacity_gb'])
|
||||
self.assertEqual(FREE_GB, stats['free_capacity_gb'])
|
||||
self.assertFalse(stats['shared_targets'])
|
||||
|
||||
def test_test_volume_stats_disabled(self):
|
||||
RESERVED_PERCENTAGE = 0
|
||||
TOTAL_GB = 'unknown'
|
||||
FREE_GB = 'unknown'
|
||||
|
||||
self._driver.configuration.vmware_enable_volume_stats = False
|
||||
stats = self._driver.get_volume_stats()
|
||||
|
||||
self.assertEqual('VMware', stats['vendor_name'])
|
||||
self.assertEqual(self._driver.VERSION, stats['driver_version'])
|
||||
self.assertEqual('vmdk', stats['storage_protocol'])
|
||||
self.assertEqual(RESERVED_PERCENTAGE,
|
||||
stats['reserved_percentage'])
|
||||
self.assertEqual(TOTAL_GB, stats['total_capacity_gb'])
|
||||
self.assertEqual(FREE_GB, stats['free_capacity_gb'])
|
||||
self.assertFalse(stats['shared_targets'])
|
||||
|
||||
def _create_volume_dict(self,
|
||||
|
@ -131,10 +131,8 @@ vmdk_opts = [
|
||||
help='Name of a vCenter compute cluster where volumes '
|
||||
'should be created.'),
|
||||
cfg.MultiStrOpt('vmware_storage_profile',
|
||||
help='Names of storage profiles to be monitored.',
|
||||
deprecated_for_removal=True,
|
||||
deprecated_reason='Setting this option results in '
|
||||
'significant performance degradation.'),
|
||||
help='Names of storage profiles to be monitored. Only '
|
||||
'used when vmware_enable_volume_stats is True.'),
|
||||
cfg.IntOpt('vmware_connection_pool_size',
|
||||
default=10,
|
||||
help='Maximum number of connections in http connection pool.'),
|
||||
@ -158,7 +156,13 @@ vmdk_opts = [
|
||||
'attached, uploaded to image service or during backup.'),
|
||||
cfg.StrOpt('vmware_datastore_regex',
|
||||
help='Regular expression pattern to match the name of '
|
||||
'datastores where backend volumes are created.')
|
||||
'datastores where backend volumes are created.'),
|
||||
cfg.BoolOpt('vmware_enable_volume_stats',
|
||||
default=False,
|
||||
help='If true, this enables the fetching of the volume stats '
|
||||
'from the backend. This has potential performance '
|
||||
'issues at scale. When False, the driver will not '
|
||||
'collect ANY stats about the backend.')
|
||||
]
|
||||
|
||||
CONF = cfg.CONF
|
||||
@ -271,7 +275,10 @@ class VMwareVcVmdkDriver(driver.VolumeDriver):
|
||||
# 3.4.0 - added NFS41 as a supported datastore type
|
||||
# 3.4.1 - volume capacity stats implemented
|
||||
# 3.4.2 - deprecated option vmware_storage_profile
|
||||
VERSION = '3.4.2'
|
||||
# 3.4.3 - un-deprecated option vmware_storage_profile and added new
|
||||
# option vmware_enable_volume_stats to optionally enable
|
||||
# real get_volume_stats for proper scheduling of this driver.
|
||||
VERSION = '3.4.3'
|
||||
|
||||
# ThirdPartySystems wiki page
|
||||
CI_WIKI_NAME = "VMware_CI"
|
||||
@ -326,7 +333,18 @@ class VMwareVcVmdkDriver(driver.VolumeDriver):
|
||||
|
||||
:param refresh: Whether to get refreshed information
|
||||
"""
|
||||
if not self._stats or refresh:
|
||||
if self.configuration.safe_get('vmware_enable_volume_stats'):
|
||||
self._stats = self._get_volume_stats(refresh)
|
||||
else:
|
||||
self._stats = self._get_fake_stats(refresh)
|
||||
return self._stats
|
||||
|
||||
def _get_fake_stats(self, refresh=False):
|
||||
"""Provide fake stats to the scheduler.
|
||||
|
||||
:param refresh: Whether to get refreshed information
|
||||
"""
|
||||
if not self._stats:
|
||||
backend_name = self.configuration.safe_get('volume_backend_name')
|
||||
if not backend_name:
|
||||
@ -342,6 +360,113 @@ class VMwareVcVmdkDriver(driver.VolumeDriver):
|
||||
self._stats = data
|
||||
return self._stats
|
||||
|
||||
def _get_volume_stats(self, refresh=False):
|
||||
"""Fetch the stats about the backend.
|
||||
|
||||
This can be slow at scale, but allows
|
||||
properly provisioning scheduling.
|
||||
"""
|
||||
backend_name = self.configuration.safe_get('volume_backend_name')
|
||||
if not backend_name:
|
||||
backend_name = self.__class__.__name__
|
||||
data = {'volume_backend_name': backend_name,
|
||||
'vendor_name': 'VMware',
|
||||
'driver_version': self.VERSION,
|
||||
'storage_protocol': 'vmdk',
|
||||
'reserved_percentage': self.configuration.reserved_percentage,
|
||||
'shared_targets': False}
|
||||
ds_summaries = self._get_datastore_summaries()
|
||||
available_hosts = self._get_hosts(self._clusters)
|
||||
global_capacity = 0
|
||||
global_free = 0
|
||||
while True:
|
||||
for ds in ds_summaries.objects:
|
||||
ds_props = self._get_object_properties(ds)
|
||||
summary = ds_props['summary']
|
||||
if self._is_datastore_accessible(summary,
|
||||
ds_props['host'],
|
||||
available_hosts):
|
||||
global_capacity += summary.capacity
|
||||
global_free += summary.freeSpace
|
||||
if getattr(ds_summaries, 'token', None):
|
||||
ds_summaries = self.volumeops.continue_retrieval(ds_summaries)
|
||||
else:
|
||||
break
|
||||
data['total_capacity_gb'] = round(global_capacity / units.Gi)
|
||||
data['free_capacity_gb'] = round(global_free / units.Gi)
|
||||
return data
|
||||
|
||||
def _get_datastore_summaries(self):
|
||||
client_factory = self.session.vim.client.factory
|
||||
object_specs = []
|
||||
if (self._storage_policy_enabled
|
||||
and self.configuration.vmware_storage_profile):
|
||||
# Get all available storage profiles on the vCenter and extract the
|
||||
# IDs of those that we want to observe
|
||||
profiles_ids = []
|
||||
for profile in pbm.get_all_profiles(self.session):
|
||||
if profile.name in self.configuration.vmware_storage_profile:
|
||||
profiles_ids.append(profile.profileId)
|
||||
# Get all matching Datastores for each profile
|
||||
datastores = {}
|
||||
for profile_id in profiles_ids:
|
||||
for pbm_hub in pbm.filter_hubs_by_profile(self.session,
|
||||
None,
|
||||
profile_id):
|
||||
if pbm_hub.hubType != "Datastore":
|
||||
# We are not interested in Datastore Clusters for now
|
||||
continue
|
||||
if pbm_hub.hubId not in datastores:
|
||||
# Reconstruct a managed object reference to datastore
|
||||
datastores[pbm_hub.hubId] = vim_util.get_moref(
|
||||
pbm_hub.hubId, "Datastore")
|
||||
# Build property collector object specs out of them
|
||||
for datastore_ref in datastores.values():
|
||||
object_specs.append(
|
||||
vim_util.build_object_spec(client_factory,
|
||||
datastore_ref,
|
||||
[]))
|
||||
else:
|
||||
# Build a catch-all object spec that would reach all datastores
|
||||
object_specs.append(
|
||||
vim_util.build_object_spec(
|
||||
client_factory,
|
||||
self.session.vim.service_content.rootFolder,
|
||||
[vim_util.build_recursive_traversal_spec(client_factory)]))
|
||||
prop_spec = vim_util.build_property_spec(client_factory, 'Datastore',
|
||||
['summary', 'host'])
|
||||
filter_spec = vim_util.build_property_filter_spec(client_factory,
|
||||
prop_spec,
|
||||
object_specs)
|
||||
options = client_factory.create('ns0:RetrieveOptions')
|
||||
options.maxObjects = self.configuration.vmware_max_objects_retrieval
|
||||
result = self.session.vim.RetrievePropertiesEx(
|
||||
self.session.vim.service_content.propertyCollector,
|
||||
specSet=[filter_spec],
|
||||
options=options)
|
||||
return result
|
||||
|
||||
def _get_object_properties(self, obj_content):
|
||||
props = {}
|
||||
if hasattr(obj_content, 'propSet'):
|
||||
prop_set = obj_content.propSet
|
||||
if prop_set:
|
||||
props = {prop.name: prop.val for prop in prop_set}
|
||||
return props
|
||||
|
||||
def _is_datastore_accessible(self, ds_summary, ds_host_mounts,
|
||||
available_hosts):
|
||||
# available_hosts empty => vmware_cluster_name not specified => don't
|
||||
# filter by hosts
|
||||
cluster_access_to_ds = not available_hosts
|
||||
for host_mount in ds_host_mounts.DatastoreHostMount:
|
||||
for avlbl_host in available_hosts:
|
||||
if avlbl_host.value == host_mount.key.value:
|
||||
cluster_access_to_ds = True
|
||||
return (ds_summary.accessible
|
||||
and not self.volumeops._in_maintenance(ds_summary)
|
||||
and cluster_access_to_ds)
|
||||
|
||||
def _verify_volume_creation(self, volume):
|
||||
"""Verify that the volume can be created.
|
||||
|
||||
|
@ -0,0 +1,23 @@
|
||||
---
|
||||
upgrade:
|
||||
- |
|
||||
VMware vmdk driver: The vmware vmdk driver had its get_volume_stats
|
||||
removed in a previous release due to a potential performance hit of 20%
|
||||
at a high load. The problem with reporting ``unknown`` back to the
|
||||
scheduler, is that it effectively removes cinder's ability to properly
|
||||
schedule based on capacity utilization. When this driver is enabled in a
|
||||
heterogenous environment without properly reporting utilization
|
||||
statistics, the scheduler's capacity filter will always allow this driver
|
||||
to service a provisioning request. Without reporting the backend stats,
|
||||
the capacity filter also can't determine the reserved_percentage as well
|
||||
as the max_over_subscription_ratio. To enable the collection of stats
|
||||
set ``vmware_enable_volume_stats`` to True in the driver section of
|
||||
cinder.conf. The default setting is False. Keep in mind that there may
|
||||
be a degradation in performance on the vcenter when enabling this setting.
|
||||
fixes:
|
||||
- |
|
||||
VMware vmdk driver: The collection of volume stats, which had been
|
||||
disabled, may now be turned on by using the ``vmware_enable_volume_stats``
|
||||
configuration option. The default for this option is False (no
|
||||
stats collection). Be aware that enabling volume stats may cause
|
||||
performance issues under high load.
|
@ -1,6 +0,0 @@
|
||||
---
|
||||
deprecations:
|
||||
- |
|
||||
The config option ``vmware_storage_profile`` is now deprecated
|
||||
and ignored. Setting this option results in performance degradation
|
||||
of the controller and put lot of load on vCenter server.
|
Loading…
Reference in New Issue
Block a user