Vmware: Revert the volume stats revert.

This patch reverts the removal of the collection of volume stats
from the vCenter.   Of course there is a performance hit when
collecting volume stats against a backend as compared to doing
nothing.  The problem with not having any stats collection is that
the scheduler can't schedule properly against this driver, especially
when there are multiple of them deployed and heterogeniously against
other backends.   This revert fixes existing deployments against
this backend.

This patch includes a new vmware driver config optiont that allows one
to enable the stats collection, so that a deployment with this driver
can actually work with the scheduler.  When a driver doesn't report
any statistics, the scheduler has no way to determine the capacity
utilization of the driver and will always assume it can take a
provisioning request. This is especially problematic in a
heterogeniously deployed environment, where other drivers do report
valid stats.

This reverts commit f3ee3dbf4e.

Change-Id: Ifdd9b89e00de692a7a9e121ed421b921aa8bfc70
Closes-Bug: 1859864
This commit is contained in:
hemna 2020-01-15 14:06:46 -05:00 committed by Hemna
parent 10bbfcc78e
commit d987062d15
5 changed files with 268 additions and 42 deletions

View File

@ -44,6 +44,7 @@ class VMwareVStorageObjectDriverTestCase(test.TestCase):
IP = 'localhost' IP = 'localhost'
PORT = 2321 PORT = 2321
IMG_TX_TIMEOUT = 10 IMG_TX_TIMEOUT = 10
RESERVED_PERCENTAGE = 0
VMDK_DRIVER = vmdk.VMwareVcVmdkDriver VMDK_DRIVER = vmdk.VMwareVcVmdkDriver
FCD_DRIVER = fcd.VMwareVStorageObjectDriver FCD_DRIVER = fcd.VMwareVStorageObjectDriver
VC_VERSION = "6.7.0" VC_VERSION = "6.7.0"
@ -64,6 +65,7 @@ class VMwareVStorageObjectDriverTestCase(test.TestCase):
self._config.vmware_host_ip = self.IP self._config.vmware_host_ip = self.IP
self._config.vmware_host_port = self.PORT self._config.vmware_host_port = self.PORT
self._config.vmware_image_transfer_timeout_secs = self.IMG_TX_TIMEOUT self._config.vmware_image_transfer_timeout_secs = self.IMG_TX_TIMEOUT
self._config.reserved_percentage = self.RESERVED_PERCENTAGE
self._driver = fcd.VMwareVStorageObjectDriver( self._driver = fcd.VMwareVStorageObjectDriver(
configuration=self._config) configuration=self._config)
self._driver._vc_version = self.VC_VERSION self._driver._vc_version = self.VC_VERSION
@ -82,15 +84,39 @@ class VMwareVStorageObjectDriverTestCase(test.TestCase):
self.assertTrue(self._driver._use_fcd_snapshot) self.assertTrue(self._driver._use_fcd_snapshot)
self.assertTrue(self._driver._storage_policy_enabled) self.assertTrue(self._driver._storage_policy_enabled)
def test_get_volume_stats(self): @mock.patch.object(VMDK_DRIVER, 'volumeops')
@mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries')
def test_get_volume_stats(self, _get_datastore_summaries, vops):
FREE_GB = 7
TOTAL_GB = 11
class ObjMock(object):
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
_get_datastore_summaries.return_value = \
ObjMock(objects= [
ObjMock(propSet = [
ObjMock(name = "host",
val = ObjMock(DatastoreHostMount = [])),
ObjMock(name = "summary",
val = ObjMock(freeSpace = FREE_GB * units.Gi,
capacity = TOTAL_GB * units.Gi,
accessible = True))
])
])
vops._in_maintenance.return_value = False
stats = self._driver.get_volume_stats() stats = self._driver.get_volume_stats()
self.assertEqual('VMware', stats['vendor_name']) self.assertEqual('VMware', stats['vendor_name'])
self.assertEqual(self._driver.VERSION, stats['driver_version']) self.assertEqual(self._driver.VERSION, stats['driver_version'])
self.assertEqual(self._driver.STORAGE_TYPE, stats['storage_protocol']) self.assertEqual(self._driver.STORAGE_TYPE, stats['storage_protocol'])
self.assertEqual(0, stats['reserved_percentage']) self.assertEqual(self.RESERVED_PERCENTAGE,
self.assertEqual('unknown', stats['total_capacity_gb']) stats['reserved_percentage'])
self.assertEqual('unknown', stats['free_capacity_gb']) self.assertEqual(TOTAL_GB, stats['total_capacity_gb'])
self.assertEqual(FREE_GB, stats['free_capacity_gb'])
def _create_volume_dict(self, def _create_volume_dict(self,
vol_id=VOL_ID, vol_id=VOL_ID,

View File

@ -34,15 +34,29 @@ from cinder.tests.unit import fake_constants
from cinder.tests.unit import fake_snapshot from cinder.tests.unit import fake_snapshot
from cinder.tests.unit import fake_volume from cinder.tests.unit import fake_volume
from cinder.tests.unit import utils as test_utils from cinder.tests.unit import utils as test_utils
from cinder.volume import configuration
from cinder.volume.drivers.vmware import datastore as hub from cinder.volume.drivers.vmware import datastore as hub
from cinder.volume.drivers.vmware import exceptions as vmdk_exceptions from cinder.volume.drivers.vmware import exceptions as vmdk_exceptions
from cinder.volume.drivers.vmware import vmdk from cinder.volume.drivers.vmware import vmdk
from cinder.volume.drivers.vmware import volumeops from cinder.volume.drivers.vmware import volumeops
class MockConfiguration(object):
def __init__(self, **kwargs):
for kw in kwargs:
setattr(self, kw, kwargs[kw])
def safe_get(self, name):
return getattr(self, name) if hasattr(self, name) else None
def append_config_values(self, opts):
for opt in opts:
if not hasattr(self, opt.name):
setattr(self, opt.name, opt.default or None)
# TODO(vbala) Split test methods handling multiple cases into multiple methods, # TODO(vbala) Split test methods handling multiple cases into multiple methods,
# each handling a specific case. # each handling a specific case.
@ddt.ddt @ddt.ddt
class VMwareVcVmdkDriverTestCase(test.TestCase): class VMwareVcVmdkDriverTestCase(test.TestCase):
"""Unit tests for VMwareVcVmdkDriver.""" """Unit tests for VMwareVcVmdkDriver."""
@ -80,27 +94,29 @@ class VMwareVcVmdkDriverTestCase(test.TestCase):
def setUp(self): def setUp(self):
super(VMwareVcVmdkDriverTestCase, self).setUp() super(VMwareVcVmdkDriverTestCase, self).setUp()
self._config = mock.Mock(spec=configuration.Configuration) self._config = MockConfiguration(
self._config.vmware_host_ip = self.IP vmware_host_ip=self.IP,
self._config.vmware_host_port = self.PORT vmware_host_port=self.PORT,
self._config.vmware_host_username = self.USERNAME vmware_host_username=self.USERNAME,
self._config.vmware_host_password = self.PASSWORD vmware_host_password=self.PASSWORD,
self._config.vmware_wsdl_location = None vmware_wsdl_location=None,
self._config.vmware_volume_folder = self.VOLUME_FOLDER vmware_volume_folder=self.VOLUME_FOLDER,
self._config.vmware_api_retry_count = self.API_RETRY_COUNT vmware_api_retry_count=self.API_RETRY_COUNT,
self._config.vmware_task_poll_interval = self.TASK_POLL_INTERVAL vmware_task_poll_interval=self.TASK_POLL_INTERVAL,
self._config.vmware_image_transfer_timeout_secs = self.IMG_TX_TIMEOUT vmware_image_transfer_timeout_secs=self.IMG_TX_TIMEOUT,
self._config.vmware_max_objects_retrieval = self.MAX_OBJECTS vmware_max_objects_retrieval=self.MAX_OBJECTS,
self._config.vmware_tmp_dir = self.TMP_DIR vmware_tmp_dir=self.TMP_DIR,
self._config.vmware_ca_file = self.CA_FILE vmware_ca_file=self.CA_FILE,
self._config.vmware_insecure = False vmware_insecure=False,
self._config.vmware_cluster_name = self.CLUSTERS vmware_cluster_name=self.CLUSTERS,
self._config.vmware_host_version = self.DEFAULT_VC_VERSION vmware_host_version=self.DEFAULT_VC_VERSION,
self._config.vmware_connection_pool_size = self.POOL_SIZE vmware_connection_pool_size=self.POOL_SIZE,
self._config.vmware_adapter_type = self.ADAPTER_TYPE vmware_adapter_type=self.ADAPTER_TYPE,
self._config.vmware_snapshot_format = self.SNAPSHOT_FORMAT vmware_snapshot_format=self.SNAPSHOT_FORMAT,
self._config.vmware_lazy_create = True vmware_lazy_create=True,
self._config.vmware_datastore_regex = None vmware_datastore_regex=None,
reserved_percentage=0
)
self._db = mock.Mock() self._db = mock.Mock()
self._driver = vmdk.VMwareVcVmdkDriver(configuration=self._config, self._driver = vmdk.VMwareVcVmdkDriver(configuration=self._config,
@ -109,15 +125,57 @@ class VMwareVcVmdkDriverTestCase(test.TestCase):
self._context = context.get_admin_context() self._context = context.get_admin_context()
self.updated_at = timeutils.utcnow() self.updated_at = timeutils.utcnow()
def test_get_volume_stats(self): @mock.patch.object(VMDK_DRIVER, 'volumeops')
@mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries')
def test_get_volume_stats(self, _get_datastore_summaries, vops):
FREE_GB = 7
TOTAL_GB = 11
class ObjMock(object):
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
_get_datastore_summaries.return_value = (ObjMock(objects= [
ObjMock(propSet = [
ObjMock(name = "host",
val = ObjMock(DatastoreHostMount = [])),
ObjMock(name = "summary",
val = ObjMock(freeSpace = FREE_GB * units.Gi,
capacity = TOTAL_GB * units.Gi,
accessible = True))
])
]))
vops._in_maintenance.return_value = False
# Enable volume stats collection from backend
self._driver.configuration.vmware_enable_volume_stats = True
stats = self._driver.get_volume_stats() stats = self._driver.get_volume_stats()
self.assertEqual('VMware', stats['vendor_name']) self.assertEqual('VMware', stats['vendor_name'])
self.assertEqual(self._driver.VERSION, stats['driver_version']) self.assertEqual(self._driver.VERSION, stats['driver_version'])
self.assertEqual('vmdk', stats['storage_protocol']) self.assertEqual('vmdk', stats['storage_protocol'])
self.assertEqual(0, stats['reserved_percentage']) self.assertEqual(self._config.reserved_percentage,
self.assertEqual('unknown', stats['total_capacity_gb']) stats['reserved_percentage'])
self.assertEqual('unknown', stats['free_capacity_gb']) self.assertEqual(TOTAL_GB, stats['total_capacity_gb'])
self.assertEqual(FREE_GB, stats['free_capacity_gb'])
self.assertFalse(stats['shared_targets'])
def test_test_volume_stats_disabled(self):
RESERVED_PERCENTAGE = 0
TOTAL_GB = 'unknown'
FREE_GB = 'unknown'
self._driver.configuration.vmware_enable_volume_stats = False
stats = self._driver.get_volume_stats()
self.assertEqual('VMware', stats['vendor_name'])
self.assertEqual(self._driver.VERSION, stats['driver_version'])
self.assertEqual('vmdk', stats['storage_protocol'])
self.assertEqual(RESERVED_PERCENTAGE,
stats['reserved_percentage'])
self.assertEqual(TOTAL_GB, stats['total_capacity_gb'])
self.assertEqual(FREE_GB, stats['free_capacity_gb'])
self.assertFalse(stats['shared_targets']) self.assertFalse(stats['shared_targets'])
def _create_volume_dict(self, def _create_volume_dict(self,

View File

@ -131,10 +131,8 @@ vmdk_opts = [
help='Name of a vCenter compute cluster where volumes ' help='Name of a vCenter compute cluster where volumes '
'should be created.'), 'should be created.'),
cfg.MultiStrOpt('vmware_storage_profile', cfg.MultiStrOpt('vmware_storage_profile',
help='Names of storage profiles to be monitored.', help='Names of storage profiles to be monitored. Only '
deprecated_for_removal=True, 'used when vmware_enable_volume_stats is True.'),
deprecated_reason='Setting this option results in '
'significant performance degradation.'),
cfg.IntOpt('vmware_connection_pool_size', cfg.IntOpt('vmware_connection_pool_size',
default=10, default=10,
help='Maximum number of connections in http connection pool.'), help='Maximum number of connections in http connection pool.'),
@ -158,7 +156,13 @@ vmdk_opts = [
'attached, uploaded to image service or during backup.'), 'attached, uploaded to image service or during backup.'),
cfg.StrOpt('vmware_datastore_regex', cfg.StrOpt('vmware_datastore_regex',
help='Regular expression pattern to match the name of ' help='Regular expression pattern to match the name of '
'datastores where backend volumes are created.') 'datastores where backend volumes are created.'),
cfg.BoolOpt('vmware_enable_volume_stats',
default=False,
help='If true, this enables the fetching of the volume stats '
'from the backend. This has potential performance '
'issues at scale. When False, the driver will not '
'collect ANY stats about the backend.')
] ]
CONF = cfg.CONF CONF = cfg.CONF
@ -271,7 +275,10 @@ class VMwareVcVmdkDriver(driver.VolumeDriver):
# 3.4.0 - added NFS41 as a supported datastore type # 3.4.0 - added NFS41 as a supported datastore type
# 3.4.1 - volume capacity stats implemented # 3.4.1 - volume capacity stats implemented
# 3.4.2 - deprecated option vmware_storage_profile # 3.4.2 - deprecated option vmware_storage_profile
VERSION = '3.4.2' # 3.4.3 - un-deprecated option vmware_storage_profile and added new
# option vmware_enable_volume_stats to optionally enable
# real get_volume_stats for proper scheduling of this driver.
VERSION = '3.4.3'
# ThirdPartySystems wiki page # ThirdPartySystems wiki page
CI_WIKI_NAME = "VMware_CI" CI_WIKI_NAME = "VMware_CI"
@ -326,7 +333,18 @@ class VMwareVcVmdkDriver(driver.VolumeDriver):
:param refresh: Whether to get refreshed information :param refresh: Whether to get refreshed information
""" """
if not self._stats or refresh:
if self.configuration.safe_get('vmware_enable_volume_stats'):
self._stats = self._get_volume_stats(refresh)
else:
self._stats = self._get_fake_stats(refresh)
return self._stats
def _get_fake_stats(self, refresh=False):
"""Provide fake stats to the scheduler.
:param refresh: Whether to get refreshed information
"""
if not self._stats: if not self._stats:
backend_name = self.configuration.safe_get('volume_backend_name') backend_name = self.configuration.safe_get('volume_backend_name')
if not backend_name: if not backend_name:
@ -342,6 +360,113 @@ class VMwareVcVmdkDriver(driver.VolumeDriver):
self._stats = data self._stats = data
return self._stats return self._stats
def _get_volume_stats(self, refresh=False):
"""Fetch the stats about the backend.
This can be slow at scale, but allows
properly provisioning scheduling.
"""
backend_name = self.configuration.safe_get('volume_backend_name')
if not backend_name:
backend_name = self.__class__.__name__
data = {'volume_backend_name': backend_name,
'vendor_name': 'VMware',
'driver_version': self.VERSION,
'storage_protocol': 'vmdk',
'reserved_percentage': self.configuration.reserved_percentage,
'shared_targets': False}
ds_summaries = self._get_datastore_summaries()
available_hosts = self._get_hosts(self._clusters)
global_capacity = 0
global_free = 0
while True:
for ds in ds_summaries.objects:
ds_props = self._get_object_properties(ds)
summary = ds_props['summary']
if self._is_datastore_accessible(summary,
ds_props['host'],
available_hosts):
global_capacity += summary.capacity
global_free += summary.freeSpace
if getattr(ds_summaries, 'token', None):
ds_summaries = self.volumeops.continue_retrieval(ds_summaries)
else:
break
data['total_capacity_gb'] = round(global_capacity / units.Gi)
data['free_capacity_gb'] = round(global_free / units.Gi)
return data
def _get_datastore_summaries(self):
client_factory = self.session.vim.client.factory
object_specs = []
if (self._storage_policy_enabled
and self.configuration.vmware_storage_profile):
# Get all available storage profiles on the vCenter and extract the
# IDs of those that we want to observe
profiles_ids = []
for profile in pbm.get_all_profiles(self.session):
if profile.name in self.configuration.vmware_storage_profile:
profiles_ids.append(profile.profileId)
# Get all matching Datastores for each profile
datastores = {}
for profile_id in profiles_ids:
for pbm_hub in pbm.filter_hubs_by_profile(self.session,
None,
profile_id):
if pbm_hub.hubType != "Datastore":
# We are not interested in Datastore Clusters for now
continue
if pbm_hub.hubId not in datastores:
# Reconstruct a managed object reference to datastore
datastores[pbm_hub.hubId] = vim_util.get_moref(
pbm_hub.hubId, "Datastore")
# Build property collector object specs out of them
for datastore_ref in datastores.values():
object_specs.append(
vim_util.build_object_spec(client_factory,
datastore_ref,
[]))
else:
# Build a catch-all object spec that would reach all datastores
object_specs.append(
vim_util.build_object_spec(
client_factory,
self.session.vim.service_content.rootFolder,
[vim_util.build_recursive_traversal_spec(client_factory)]))
prop_spec = vim_util.build_property_spec(client_factory, 'Datastore',
['summary', 'host'])
filter_spec = vim_util.build_property_filter_spec(client_factory,
prop_spec,
object_specs)
options = client_factory.create('ns0:RetrieveOptions')
options.maxObjects = self.configuration.vmware_max_objects_retrieval
result = self.session.vim.RetrievePropertiesEx(
self.session.vim.service_content.propertyCollector,
specSet=[filter_spec],
options=options)
return result
def _get_object_properties(self, obj_content):
props = {}
if hasattr(obj_content, 'propSet'):
prop_set = obj_content.propSet
if prop_set:
props = {prop.name: prop.val for prop in prop_set}
return props
def _is_datastore_accessible(self, ds_summary, ds_host_mounts,
available_hosts):
# available_hosts empty => vmware_cluster_name not specified => don't
# filter by hosts
cluster_access_to_ds = not available_hosts
for host_mount in ds_host_mounts.DatastoreHostMount:
for avlbl_host in available_hosts:
if avlbl_host.value == host_mount.key.value:
cluster_access_to_ds = True
return (ds_summary.accessible
and not self.volumeops._in_maintenance(ds_summary)
and cluster_access_to_ds)
def _verify_volume_creation(self, volume): def _verify_volume_creation(self, volume):
"""Verify that the volume can be created. """Verify that the volume can be created.

View File

@ -0,0 +1,23 @@
---
upgrade:
- |
VMware vmdk driver: The vmware vmdk driver had its get_volume_stats
removed in a previous release due to a potential performance hit of 20%
at a high load. The problem with reporting ``unknown`` back to the
scheduler, is that it effectively removes cinder's ability to properly
schedule based on capacity utilization. When this driver is enabled in a
heterogenous environment without properly reporting utilization
statistics, the scheduler's capacity filter will always allow this driver
to service a provisioning request. Without reporting the backend stats,
the capacity filter also can't determine the reserved_percentage as well
as the max_over_subscription_ratio. To enable the collection of stats
set ``vmware_enable_volume_stats`` to True in the driver section of
cinder.conf. The default setting is False. Keep in mind that there may
be a degradation in performance on the vcenter when enabling this setting.
fixes:
- |
VMware vmdk driver: The collection of volume stats, which had been
disabled, may now be turned on by using the ``vmware_enable_volume_stats``
configuration option. The default for this option is False (no
stats collection). Be aware that enabling volume stats may cause
performance issues under high load.

View File

@ -1,6 +0,0 @@
---
deprecations:
- |
The config option ``vmware_storage_profile`` is now deprecated
and ignored. Setting this option results in performance degradation
of the controller and put lot of load on vCenter server.