From f32be69fc4ab836dce0cdf3ad5308f771b33f3f2 Mon Sep 17 00:00:00 2001 From: Victoria Martinez de la Cruz Date: Thu, 7 Jul 2022 16:31:52 +0000 Subject: [PATCH] Refactor the Ceph NFS driver to use Cephadm NFS CephAdmNFSProtocol helper has been added to allow users to consume NFS clusters deployed using cephadm. This presents many advantages, since the operator no longer needs to maintain their own instances of NFS Ganesha apart of the Ceph cluster. For this, we now communicate with ceph mgr using the nfs plugin. Read more about this plugin in https://docs.ceph.com/en/latest/cephfs/nfs/ Implements: bp/use-cephadm-nfs-ganesha DocImpact Change-Id: I1826f2970528928a31b32a664013380e38bbd7c9 --- manila/share/drivers/cephfs/driver.py | 303 ++++++++++++++---- manila/share/drivers/ganesha/manager.py | 2 +- manila/share/drivers/ganesha/utils.py | 6 +- .../tests/share/drivers/cephfs/test_driver.py | 157 ++++++++- ...-cephadm-nfs-ganesha-b9e071924de738fd.yaml | 22 ++ 5 files changed, 422 insertions(+), 68 deletions(-) create mode 100644 releasenotes/notes/use-cephadm-nfs-ganesha-b9e071924de738fd.yaml diff --git a/manila/share/drivers/cephfs/driver.py b/manila/share/drivers/cephfs/driver.py index 2b2be031f8..620b3703e6 100644 --- a/manila/share/drivers/cephfs/driver.py +++ b/manila/share/drivers/cephfs/driver.py @@ -140,9 +140,15 @@ cephfs_opts = [ "multiple filesystems in the cluster."), ] +cephfsnfs_opts = [ + cfg.StrOpt('cephfs_nfs_cluster_id', + help="The ID of the NFS cluster to use."), +] + CONF = cfg.CONF CONF.register_opts(cephfs_opts) +CONF.register_opts(cephfsnfs_opts) class RadosError(Exception): @@ -151,8 +157,8 @@ class RadosError(Exception): pass -def rados_command(rados_client, prefix=None, args=None, json_obj=False, - target=None): +def rados_command(rados_client, prefix=None, args=None, + json_obj=False, target=None, inbuf=None): """Safer wrapper for ceph_argparse.json_command Raises error exception instead of relying on caller to check return @@ -177,17 +183,21 @@ def rados_command(rados_client, prefix=None, args=None, json_obj=False, argdict = args.copy() argdict['format'] = 'json' + if inbuf is None: + inbuf = b'' + LOG.debug("Invoking ceph_argparse.json_command - rados_client=%(cl)s, " - "target=%(tg)s, prefix='%(pf)s', argdict=%(ad)s, " + "target=%(tg)s, prefix='%(pf)s', argdict=%(ad)s, inbuf=%(ib)s, " "timeout=%(to)s.", {"cl": rados_client, "tg": target, "pf": prefix, "ad": argdict, - "to": RADOS_TIMEOUT}) + "ib": inbuf, "to": RADOS_TIMEOUT}) try: ret, outbuf, outs = json_command(rados_client, target=target, prefix=prefix, argdict=argdict, + inbuf=inbuf, timeout=RADOS_TIMEOUT) if ret != 0: raise rados.Error(outs, ret) @@ -223,6 +233,7 @@ class CephFSDriver(driver.ExecuteMixin, driver.GaneshaMixin, self._volname = None self._ceph_mon_version = None self.configuration.append_config_values(cephfs_opts) + self.configuration.append_config_values(cephfsnfs_opts) try: int(self.configuration.cephfs_volume_mode, 8) @@ -239,8 +250,14 @@ class CephFSDriver(driver.ExecuteMixin, driver.GaneshaMixin, protocol_helper_class = getattr( sys.modules[__name__], 'NativeProtocolHelper') else: - protocol_helper_class = getattr( - sys.modules[__name__], 'NFSProtocolHelper') + # FIXME(vkmc) we intent to replace NFSProtocolHelper + # with NFSClusterProtocolHelper helper in BB/CC release + if self.configuration.cephfs_nfs_cluster_id is None: + protocol_helper_class = getattr( + sys.modules[__name__], 'NFSProtocolHelper') + else: + protocol_helper_class = getattr( + sys.modules[__name__], 'NFSClusterProtocolHelper') self.setup_default_ceph_cmd_target() @@ -952,7 +969,75 @@ class NativeProtocolHelper(ganesha.NASHelperBase): return [4] -class NFSProtocolHelper(ganesha.GaneshaNASHelper2): +class NFSProtocolHelperMixin(): + + def get_export_locations(self, share, subvolume_path): + export_locations = [] + + if not self.export_ips: + self.export_ips = self._get_export_ips() + + for export_ip in self.export_ips: + # Try to escape the export ip. If it fails, means that the + # `cephfs_ganesha_server_ip` wasn't possibly set and the used + # address is the hostname + try: + server_address = driver_helpers.escaped_address(export_ip) + except ValueError: + server_address = export_ip + + export_path = "{server_address}:{mount_path}".format( + server_address=server_address, mount_path=subvolume_path) + + LOG.info("Calculated export path for share %(id)s: %(epath)s", + {"id": share['id'], "epath": export_path}) + export_location = { + 'path': export_path, + 'is_admin_only': False, + 'metadata': {}, + } + export_locations.append(export_location) + return export_locations + + def _get_export_path(self, share): + """Callback to provide export path.""" + argdict = { + "vol_name": self.volname, + "sub_name": share["id"] + } + if share["share_group_id"] is not None: + argdict.update({"group_name": share["share_group_id"]}) + + path = rados_command( + self.rados_client, "fs subvolume getpath", argdict) + + return path + + def _get_export_pseudo_path(self, share): + """Callback to provide pseudo path.""" + return self._get_export_path(share) + + def get_configured_ip_versions(self): + if not self.configured_ip_versions: + try: + if not self.export_ips: + self.export_ips = self._get_export_ips() + + for export_ip in self.export_ips: + self.configured_ip_versions.add( + ipaddress.ip_address(str(export_ip)).version) + except Exception: + # export_ips contained a hostname, safest thing is to + # claim support for IPv4 and IPv6 address families + LOG.warning("Setting configured IP versions to [4, 6] since " + "a hostname (rather than IP address) was supplied " + "in 'cephfs_ganesha_server_ip' or " + "in 'cephfs_ganesha_export_ips'.") + return [4, 6] + return list(self.configured_ip_versions) + + +class NFSProtocolHelper(NFSProtocolHelperMixin, ganesha.GaneshaNASHelper2): shared_data = {} supported_protocols = ('NFS',) @@ -980,9 +1065,7 @@ class NFSProtocolHelper(ganesha.GaneshaNASHelper2): self.rados_client = kwargs.pop('rados_client') if not hasattr(self, 'volname'): self.volname = kwargs.pop('volname') - self.export_ips = config_object.cephfs_ganesha_export_ips - if not self.export_ips: - self.export_ips = [self.ganesha_host] + self.export_ips = None self.configured_ip_versions = set() self.config = config_object @@ -998,30 +1081,6 @@ class NFSProtocolHelper(ganesha.GaneshaNASHelper2): "hostname.") % export_ip) raise exception.InvalidParameterValue(err=msg) - def get_export_locations(self, share, subvolume_path): - export_locations = [] - for export_ip in self.export_ips: - # Try to escape the export ip. If it fails, means that the - # `cephfs_ganesha_server_ip` wasn't possibly set and the used - # address is the hostname - try: - server_address = driver_helpers.escaped_address(export_ip) - except ValueError: - server_address = export_ip - - export_path = "{server_address}:{mount_path}".format( - server_address=server_address, mount_path=subvolume_path) - - LOG.info("Calculated export path for share %(id)s: %(epath)s", - {"id": share['id'], "epath": export_path}) - export_location = { - 'path': export_path, - 'is_admin_only': False, - 'metadata': {}, - } - export_locations.append(export_location) - return export_locations - def _default_config_hook(self): """Callback to provide default export block.""" dconf = super(NFSProtocolHelper, self)._default_config_hook() @@ -1070,36 +1129,160 @@ class NFSProtocolHelper(ganesha.GaneshaNASHelper2): rados_command(self.rados_client, "fs subvolume deauthorize", argdict) - def _get_export_path(self, share): - """Callback to provide export path.""" + def _get_export_ips(self): + export_ips = self.config.cephfs_ganesha_export_ips + if not export_ips: + export_ips = [self.ganesha_host] + + return export_ips + + +class NFSClusterProtocolHelper(NFSProtocolHelperMixin, ganesha.NASHelperBase): + + supported_access_types = ('ip', ) + supported_access_levels = (constants.ACCESS_LEVEL_RW, + constants.ACCESS_LEVEL_RO) + + def __init__(self, execute, config_object, **kwargs): + self.rados_client = kwargs.pop('rados_client') + self.volname = kwargs.pop('volname') + self.configured_ip_versions = set() + self.configuration = config_object + self._nfs_clusterid = None + self.export_ips = None + super(NFSClusterProtocolHelper, self).__init__(execute, + config_object, + **kwargs) + + @property + def nfs_clusterid(self): + # ID of the NFS cluster where the driver exports shares + if self._nfs_clusterid: + return self._nfs_clusterid + + self._nfs_clusterid = ( + self.configuration.safe_get('cephfs_nfs_cluster_id')) + + if not self._nfs_clusterid: + msg = _("The NFS Cluster ID has not been configured" + "Please check cephfs_nfs_cluster_id option " + "has been correctly set in the backend configuration.") + raise exception.ShareBackendException(msg=msg) + + return self._nfs_clusterid + + def _get_export_ips(self): + """Get NFS cluster export ips.""" + nfs_clusterid = self.nfs_clusterid + export_ips = [] + argdict = { - "vol_name": self.volname, - "sub_name": share["id"] + "nfs_cluster_id": nfs_clusterid, } - if share["share_group_id"] is not None: - argdict.update({"group_name": share["share_group_id"]}) - path = rados_command( - self.rados_client, "fs subvolume getpath", argdict) + output = rados_command(self.rados_client, "nfs cluster info", argdict) - return path + nfs_cluster_info = json.loads(output) - def _get_export_pseudo_path(self, share): - """Callback to provide pseudo path.""" - return self._get_export_path(share) + # NFS has been deployed with an ingress + # we use the VIP for the export ips + vip = nfs_cluster_info[nfs_clusterid]["virtual_ip"] - def get_configured_ip_versions(self): - if not self.configured_ip_versions: + # there is no VIP, we fallback to NFS cluster ips + if not vip: + hosts = nfs_cluster_info[nfs_clusterid]["backend"] + for host in hosts: + export_ips.append(host["ip"]) + else: + export_ips.append(vip) + + return export_ips + + def check_for_setup_error(self): + """Returns an error if prerequisites aren't met.""" + return + + def _allow_access(self, share, access): + """Allow access to the share.""" + export = { + "path": self._get_export_path(share), + "nfs_cluster_id": self.nfs_clusterid, + "pseudo": self._get_export_pseudo_path(share), + "squash": "none", + "security_label": True, + "protocols": [4], + "fsal": { + "name": "CEPH", + "fs_name": self.volname, + + }, + "clients": access + } + + argdict = { + "nfs_cluster_id": self.nfs_clusterid, + } + + inbuf = json.dumps(export).encode('utf-8') + rados_command(self.rados_client, + "nfs export apply", argdict, inbuf=inbuf) + + def _deny_access(self, share): + """Deny access to the share.""" + + argdict = { + "nfs_cluster_id": self.nfs_clusterid, + "pseudo_path": self._get_export_pseudo_path(share) + } + + rados_command(self.rados_client, "nfs export rm", argdict) + + def update_access(self, context, share, access_rules, add_rules, + delete_rules, share_server=None): + """Update access rules of share. + + Creates an export per share. Modifies access rules of shares by + dynamically updating exports via ceph nfs. + """ + rule_state_map = {} + + wanted_rw_clients, wanted_ro_clients = [], [] + for rule in access_rules: try: - for export_ip in self.export_ips: - self.configured_ip_versions.add( - ipaddress.ip_address(str(export_ip)).version) - except Exception: - # export_ips contained a hostname, safest thing is to - # claim support for IPv4 and IPv6 address families - LOG.warning("Setting configured IP versions to [4, 6] since " - "a hostname (rather than IP address) was supplied " - "in 'cephfs_ganesha_server_ip' or " - "in 'cephfs_ganesha_export_ips'.") - return [4, 6] - return list(self.configured_ip_versions) + ganesha_utils.validate_access_rule( + self.supported_access_types, self.supported_access_levels, + rule, True) + except (exception.InvalidShareAccess, + exception.InvalidShareAccessLevel): + rule_state_map[rule['id']] = {'state': 'error'} + continue + + rule = ganesha_utils.fixup_access_rule(rule) + if rule['access_level'] == 'rw': + wanted_rw_clients.append(rule['access_to']) + elif rule['access_level'] == 'ro': + wanted_ro_clients.append(rule['access_to']) + + if access_rules: + # add or update export + clients = [] + if wanted_ro_clients: + clients.append({ + 'access_type': 'ro', + 'addresses': wanted_ro_clients, + 'squash': 'none' + }) + if wanted_rw_clients: + clients.append({ + 'access_type': 'rw', + 'addresses': wanted_rw_clients, + 'squash': 'none' + }) + + if clients: # empty list if no rules passed validation + self._allow_access(share, clients) + else: + # no clients have access to the share. remove export + self._deny_access(share) + + return rule_state_map diff --git a/manila/share/drivers/ganesha/manager.py b/manila/share/drivers/ganesha/manager.py index c42ecfadd0..17ef1fe0dd 100644 --- a/manila/share/drivers/ganesha/manager.py +++ b/manila/share/drivers/ganesha/manager.py @@ -215,7 +215,7 @@ def setup_rados(): rados = importutils.import_module('rados') except ImportError: raise exception.ShareBackendException( - _("python-rados is not installed")) + _("rados python module is not installed")) class GaneshaManager(object): diff --git a/manila/share/drivers/ganesha/utils.py b/manila/share/drivers/ganesha/utils.py index bc9eb044f7..08e366e3db 100644 --- a/manila/share/drivers/ganesha/utils.py +++ b/manila/share/drivers/ganesha/utils.py @@ -105,7 +105,11 @@ def validate_access_rule(supported_access_types, supported_access_levels, errmsg = _("Unsupported access rule of 'type' %(access_type)s, " "'level' %(access_level)s, 'to' %(access_to)s: " "%(field)s should be one of %(supported)s.") - access_param = access_rule.to_dict() + + if not isinstance(access_rule, dict): + access_param = access_rule.to_dict() + else: + access_param = access_rule def validate(field, supported_tokens, excinfo): if access_rule['access_%s' % field] in supported_tokens: diff --git a/manila/tests/share/drivers/cephfs/test_driver.py b/manila/tests/share/drivers/cephfs/test_driver.py index 75279f8e87..c53b113f25 100644 --- a/manila/tests/share/drivers/cephfs/test_driver.py +++ b/manila/tests/share/drivers/cephfs/test_driver.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +import json from unittest import mock import ddt @@ -88,6 +89,7 @@ class CephFSDriverTestCase(test.TestCase): self.mock_object(driver, "json_command", MockCephArgparseModule) self.mock_object(driver, 'NativeProtocolHelper') self.mock_object(driver, 'NFSProtocolHelper') + self.mock_object(driver, 'NFSClusterProtocolHelper') driver.ceph_default_target = ('mon-mgr', ) @@ -101,10 +103,17 @@ class CephFSDriverTestCase(test.TestCase): self.mock_object(share_types, 'get_share_type_extra_specs', mock.Mock(return_value={})) - @ddt.data('cephfs', 'nfs') - def test_do_setup(self, protocol_helper): + @ddt.data( + ('cephfs', None), + ('nfs', None), + ('nfs', 'fs-manila') + ) + @ddt.unpack + def test_do_setup(self, protocol_helper, cephfs_nfs_cluster_id): self._driver.configuration.cephfs_protocol_helper_type = ( protocol_helper) + self.fake_conf.set_default('cephfs_nfs_cluster_id', + cephfs_nfs_cluster_id) self._driver.do_setup(self._context) @@ -114,10 +123,16 @@ class CephFSDriverTestCase(test.TestCase): rados_client=self._driver._rados_client, volname=self._driver.volname) else: - driver.NFSProtocolHelper.assert_called_once_with( - self._execute, self._driver.configuration, - rados_client=self._driver._rados_client, - volname=self._driver.volname) + if self.fake_conf.cephfs_nfs_cluster_id is None: + driver.NFSProtocolHelper.assert_called_once_with( + self._execute, self._driver.configuration, + rados_client=self._driver._rados_client, + volname=self._driver.volname) + else: + driver.NFSClusterProtocolHelper.assert_called_once_with( + self._execute, self._driver.configuration, + rados_client=self._driver._rados_client, + volname=self._driver.volname) self._driver.protocol_helper.init_helper.assert_called_once_with() @@ -1219,6 +1234,136 @@ class NFSProtocolHelperTestCase(test.TestCase): self.assertEqual('/foo/bar', ret) +@ddt.ddt +class NFSClusterProtocolHelperTestCase(test.TestCase): + + def setUp(self): + super(NFSClusterProtocolHelperTestCase, self).setUp() + self._execute = mock.Mock() + self._context = context.get_admin_context() + self._share = fake_share.fake_share(share_proto='NFS') + self._rados_client = MockRadosModule.Rados() + self._volname = "cephfs" + self.fake_conf = configuration.Configuration(None) + + self.mock_object(driver.NFSClusterProtocolHelper, + '_get_export_path', + mock.Mock(return_value="ganesha:/foo/bar")) + self.mock_object(driver.NFSClusterProtocolHelper, + '_get_export_pseudo_path', + mock.Mock(return_value="ganesha:/foo/bar")) + self.mock_object(driver, "rados_command") + + driver.ceph_default_target = ('mon-mgr', ) + + self._nfscluster_protocol_helper = driver.NFSClusterProtocolHelper( + self._execute, + self.fake_conf, + rados_client=self._rados_client, + volname=self._volname) + + type(self._nfscluster_protocol_helper).nfs_clusterid = ( + mock.PropertyMock(return_value='fs-manila')) + + @ddt.data(constants.ACCESS_LEVEL_RW, constants.ACCESS_LEVEL_RO) + def test_allow_access_rw_ro(self, mode): + access_allow_prefix = "nfs export apply" + nfs_clusterid = self._nfscluster_protocol_helper.nfs_clusterid + volname = self._nfscluster_protocol_helper.volname + + clients = { + 'access_type': mode, + 'addresses': ['10.0.0.1'], + 'squash': 'none' + } + + access_allow_dict = { + "nfs_cluster_id": nfs_clusterid, + } + + export = { + "path": "ganesha:/foo/bar", + "nfs_cluster_id": nfs_clusterid, + "pseudo": "ganesha:/foo/bar", + "squash": "none", + "security_label": True, + "protocols": [4], + "fsal": { + "name": "CEPH", + "fs_name": volname, + + }, + "clients": clients + } + + inbuf = json.dumps(export).encode('utf-8') + + self._nfscluster_protocol_helper._allow_access(self._share, clients) + + driver.rados_command.assert_called_once_with( + self._rados_client, + access_allow_prefix, access_allow_dict, inbuf=inbuf) + + def test_deny_access(self): + access_deny_prefix = "nfs export rm" + + nfs_clusterid = self._nfscluster_protocol_helper.nfs_clusterid + + access_deny_dict = { + "nfs_cluster_id": nfs_clusterid, + "pseudo_path": "ganesha:/foo/bar" + } + + self._nfscluster_protocol_helper._deny_access(self._share) + + driver.rados_command.assert_called_once_with( + self._rados_client, + access_deny_prefix, access_deny_dict) + + def test_get_export_locations(self): + cluster_info_prefix = "nfs cluster info" + nfs_clusterid = self._nfscluster_protocol_helper.nfs_clusterid + + cluster_info_dict = { + "nfs_cluster_id": nfs_clusterid, + } + + cluster_info = {"fs-manila": { + "virtual_ip": None, + "backend": [ + {"hostname": "fake-ceph-node-1", + "ip": "10.0.0.10", + "port": "1010"}, + {"hostname": "fake-ceph-node-2", + "ip": "10.0.0.11", + "port": "1011"} + ] + }} + + driver.rados_command.return_value = json.dumps(cluster_info) + + fake_cephfs_subvolume_path = "/foo/bar" + expected_export_locations = [{ + 'path': '10.0.0.10:/foo/bar', + 'is_admin_only': False, + 'metadata': {}, + }, { + 'path': '10.0.0.11:/foo/bar', + 'is_admin_only': False, + 'metadata': {}, + }] + + export_locations = ( + self._nfscluster_protocol_helper.get_export_locations( + self._share, fake_cephfs_subvolume_path)) + + driver.rados_command.assert_called_once_with( + self._rados_client, + cluster_info_prefix, cluster_info_dict) + + self.assertEqual(expected_export_locations, export_locations) + + @ddt.ddt class CephFSDriverAltConfigTestCase(test.TestCase): """Test the CephFS driver with non-default config values.""" diff --git a/releasenotes/notes/use-cephadm-nfs-ganesha-b9e071924de738fd.yaml b/releasenotes/notes/use-cephadm-nfs-ganesha-b9e071924de738fd.yaml new file mode 100644 index 0000000000..76493e48e0 --- /dev/null +++ b/releasenotes/notes/use-cephadm-nfs-ganesha-b9e071924de738fd.yaml @@ -0,0 +1,22 @@ +--- +features: + - | + NFSClusterProtocolHelper has been added to allow users to consume to export CephFS shares + over a clustered NFS gateway. This presents many advantages, since the operator no longer + needs to maintain their own instances of NFS Ganesha apart of the Ceph cluster. + For this, we now communicate with ceph mgr using the nfs plugin. Read more + about this plugin in https://docs.ceph.com/en/latest/cephfs/nfs/ +upgrade: + - | + The CephFS driver now supports a new configuration option: + * cephfs_nfs_cluster_id (string option): name of the nfs cluster to use. + This option can be used to specify which NFS cluster to use. +other: + - | + Since the CephFS driver is now capable of using ceph manager commands to manage + NFS exports, we would like to deprecate and remove support for managing exports + with the help of DBUS in a future release. Please use cephadm deployed NFS ganesha + clusters in greenfield deployments with OpenStack Manila and refrain from using + a standalone non-clustered nfs-ganesha service with this driver. As this solution + is hardened for HA within Ceph, we expect to provide code to help migrate existing + nfs-ganesha exports to the nfs-ganesha clusters in a future release.