diff --git a/etc/ovf-metadata.json.sample b/etc/ovf-metadata.json.sample new file mode 100644 index 00000000..38628219 --- /dev/null +++ b/etc/ovf-metadata.json.sample @@ -0,0 +1,8 @@ +{ + "cim_pasd": [ + "ProcessorArchitecture", + "InstructionSet", + "InstructionSetExtensionName" + ] +} + diff --git a/glance/async/flows/base_import.py b/glance/async/flows/base_import.py index 3d8aea8e..8d17ca96 100644 --- a/glance/async/flows/base_import.py +++ b/glance/async/flows/base_import.py @@ -384,7 +384,8 @@ def _get_import_flows(**kwargs): # Future patches will keep using NamedExtensionManager but they'll # rely on a config option to control this process. extensions = named.NamedExtensionManager('glance.flows.import', - names=['convert', + names=['ovf_process', + 'convert', 'introspect'], name_order=True, invoke_on_load=True, diff --git a/glance/async/flows/ovf_process.py b/glance/async/flows/ovf_process.py new file mode 100644 index 00000000..e77498b4 --- /dev/null +++ b/glance/async/flows/ovf_process.py @@ -0,0 +1,269 @@ +# Copyright 2015 Intel Corporation +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import logging +import os +import re +import shutil +import tarfile + +try: + import xml.etree.cElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET + +from oslo_config import cfg +from oslo_serialization import jsonutils as json +from six.moves import urllib +from taskflow.patterns import linear_flow as lf +from taskflow import task + +from glance import i18n + + +_ = i18n._ +_LE = i18n._LE +_LW = i18n._LW +LOG = logging.getLogger(__name__) + +CONF = cfg.CONF +# Define the CIM namespaces here. Currently we will be supporting extracting +# properties only from CIM_ProcessorAllocationSettingData +CIM_NS = {'http://schemas.dmtf.org/wbem/wscim/1/cim-schema/2/' + 'CIM_ProcessorAllocationSettingData': 'cim_pasd'} + + +class _OVF_Process(task.Task): + """ + Extracts the single disk image from an OVA tarball and saves it to the + Glance image store. It also parses the included OVF file for selected + metadata which it then saves in the image store as the previously saved + image's properties. + """ + + default_provides = 'file_path' + + def __init__(self, task_id, task_type, image_repo): + self.task_id = task_id + self.task_type = task_type + self.image_repo = image_repo + super(_OVF_Process, self).__init__( + name='%s-OVF_Process-%s' % (task_type, task_id)) + + def _get_extracted_file_path(self, image_id): + return os.path.join(CONF.task.work_dir, + "%s.extracted" % image_id) + + def _get_ova_iter_objects(self, uri): + """Returns iterable object either for local file or uri + :param uri: uri (remote or local) to the ova package we want to iterate + """ + + if uri.startswith("file://"): + uri = uri.split("file://")[-1] + return open(uri, "rb") + + return urllib.request.urlopen(uri) + + def execute(self, image_id, file_path): + """ + :param image_id: Id to use when storing extracted image to Glance + image store. It is assumed that some other task has already + created a row in the store with this id. + :param file_path: Path to the OVA package + """ + + image = self.image_repo.get(image_id) + # Expect 'ova' as image container format for OVF_Process task + if image.container_format == 'ova': + # FIXME(dramakri): This is an admin-only feature for security + # reasons. Ideally this should be achieved by making the import + # task API admin only. This is one of the items that the upcoming + # import refactoring work plans to do. Until then, we will check + # the context as a short-cut. + if image.context and image.context.is_admin: + extractor = OVAImageExtractor() + data_iter = self._get_ova_iter_objects(file_path) + disk, properties = extractor.extract(data_iter) + image.extra_properties.update(properties) + image.container_format = 'bare' + self.image_repo.save(image) + dest_path = self._get_extracted_file_path(image_id) + with open(dest_path, 'wb') as f: + shutil.copyfileobj(disk, f, 4096) + + # Overwrite the input ova file since it is no longer needed + os.rename(dest_path, file_path.split("file://")[-1]) + + else: + raise RuntimeError(_('OVA extract is limited to admin')) + + return file_path + + def revert(self, image_id, result, **kwargs): + fs_path = self._get_extracted_file_path(image_id) + if os.path.exists(fs_path): + os.path.remove(fs_path) + + +class OVAImageExtractor(object): + """Extracts and parses the uploaded OVA package + + A class that extracts the disk image and OVF file from an OVA + tar archive. Parses the OVF file for metadata of interest. + """ + + def __init__(self): + self.interested_properties = [] + self._load_interested_properties() + + def extract(self, ova): + """Extracts disk image and OVF file from OVA package + + Extracts a single disk image and OVF from OVA tar archive and calls + OVF parser method. + :param ova: a file object containing the OVA file + :returns: a tuple of extracted disk file object and dictionary of + properties parsed from the OVF file + :raises: RuntimeError for malformed OVA and OVF files + """ + with tarfile.open(fileobj=ova) as tar_file: + filenames = tar_file.getnames() + ovf_filename = next((filename for filename in filenames + if filename.endswith('.ovf')), None) + if ovf_filename: + ovf = tar_file.extractfile(ovf_filename) + disk_name, properties = self._parse_OVF(ovf) + ovf.close() + else: + raise RuntimeError(_('Could not find OVF file in OVA archive ' + 'file.')) + + disk = tar_file.extractfile(disk_name) + + return (disk, properties) + + def _parse_OVF(self, ovf): + """Parses the OVF file + + Parses the OVF file for specified metadata properties. Interested + properties must be specfied in ovf-metadata.json conf file. + + The OVF file's qualified namespaces are removed from the included + properties. + :param ovf: a file object containing the OVF file + :returns: a tuple of disk filename and a properties dictionary + :raises: RuntimeError for malformed OVF file + """ + + def _get_namespace_and_tag(tag): + """Separate and return the namespace and tag elements. + + There is no native support for this operation in elementtree + package. See http://bugs.python.org/issue18304 for details. + """ + m = re.match(r'\{(.+)\}(.+)', tag) + if m: + return m.group(1), m.group(2) + else: + return '', tag + + disk_filename, file_elements, file_ref = None, None, None + properties = {} + for event, elem in ET.iterparse(ovf): + if event == 'end': + ns, tag = _get_namespace_and_tag(elem.tag) + if ns in CIM_NS and tag in self.interested_properties: + properties[CIM_NS[ns] + '_' + tag] = (elem.text.strip() + if elem.text else '') + + if tag == 'DiskSection': + disks = [child for child in list(elem) + if _get_namespace_and_tag(child.tag)[1] == + 'Disk'] + if len(disks) > 1: + """ + Currently only single disk image extraction is + supported. + FIXME(dramakri): Support multiple images in OVA package + """ + raise RuntimeError(_('Currently, OVA packages ' + 'containing multiple disk are ' + 'not supported.')) + disk = next(iter(disks)) + file_ref = next(value for key, value in disk.items() if + _get_namespace_and_tag(key)[1] == + 'fileRef') + + if tag == 'References': + file_elements = list(elem) + + # Clears elements to save memory except for 'File' and 'Disk' + # references, which we will need to later access + if tag != 'File' and tag != 'Disk': + elem.clear() + + for file_element in file_elements: + file_id = next(value for key, value in file_element.items() + if _get_namespace_and_tag(key)[1] == 'id') + if file_id != file_ref: + continue + disk_filename = next(value for key, value in file_element.items() + if _get_namespace_and_tag(key)[1] == 'href') + + return (disk_filename, properties) + + def _load_interested_properties(self): + """Find the OVF properties config file and load it. + + OVF properties config file specifies which metadata of interest to + extract. Reads in a JSON file named 'ovf-metadata.json' if available. + See example file at etc/ovf-metadata.json.sample. + """ + filename = 'ovf-metadata.json' + match = CONF.find_file(filename) + if match: + with open(match, 'r') as properties_file: + properties = json.loads(properties_file.read()) + self.interested_properties = properties.get( + 'cim_pasd', []) + if not self.interested_properties: + LOG.warn(_('OVF metadata of interest was not specified ' + 'in ovf-metadata.json config file. Please set ' + '"cim_pasd" to a list of interested ' + 'CIM_ProcessorAllocationSettingData ' + 'properties.')) + else: + LOG.warn(_('OVF properties config file "ovf-metadata.json" was ' + 'not found.')) + + +def get_flow(**kwargs): + """Returns task flow for OVF Process. + + :param task_id: Task ID + :param task_type: Type of the task. + :param image_repo: Image repository used. + """ + task_id = kwargs.get('task_id') + task_type = kwargs.get('task_type') + image_repo = kwargs.get('image_repo') + + LOG.debug("Flow: %(task_type)s with ID %(id)s on %(repo)s" % + {'task_type': task_type, 'id': task_id, 'repo': image_repo}) + + return lf.Flow(task_type).add( + _OVF_Process(task_id, task_type, image_repo), + ) diff --git a/glance/tests/unit/async/flows/test_ovf_process.py b/glance/tests/unit/async/flows/test_ovf_process.py new file mode 100644 index 00000000..e575da91 --- /dev/null +++ b/glance/tests/unit/async/flows/test_ovf_process.py @@ -0,0 +1,169 @@ +# Copyright 2015 Intel Corporation +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os.path +import shutil +import tarfile +import tempfile + +import mock +try: + from xml.etree.cElementTree import ParseError +except ImportError: + from xml.etree.ElementTree import ParseError + +from glance.async.flows import ovf_process +import glance.tests.utils as test_utils +from oslo_config import cfg + + +class TestOvfProcessTask(test_utils.BaseTestCase): + + def setUp(self): + super(TestOvfProcessTask, self).setUp() + # The glance/tests/var dir containing sample ova packages used + # by the tests in this class + self.test_ova_dir = os.path.abspath(os.path.join( + os.path.dirname(__file__), + '../../../', 'var')) + self.tempdir = tempfile.mkdtemp() + self.config(work_dir=self.tempdir, group="task") + + # These are the properties that we will extract from the ovf + # file contained in a ova package + interested_properties = ( + '{\n' + ' "cim_pasd": [\n' + ' "InstructionSetExtensionName",\n' + ' "ProcessorArchitecture"]\n' + '}\n') + self.config_file_name = os.path.join(self.tempdir, 'ovf-metadata.json') + with open(self.config_file_name, 'w') as config_file: + config_file.write(interested_properties) + + self.image = mock.Mock() + self.image.container_format = 'ova' + self.image.context.is_admin = True + + self.img_repo = mock.Mock() + self.img_repo.get.return_value = self.image + + def tearDown(self): + if os.path.exists(self.tempdir): + shutil.rmtree(self.tempdir) + + super(TestOvfProcessTask, self).tearDown() + + def _copy_ova_to_tmpdir(self, ova_name): + # Copies an ova pacakge to the tempdir for tempdir from where + # the system-under-test will read it from + shutil.copy(os.path.join(self.test_ova_dir, ova_name), self.tempdir) + return os.path.join(self.tempdir, ova_name) + + @mock.patch.object(cfg.ConfigOpts, 'find_file') + def test_ovf_process_success(self, mock_find_file): + mock_find_file.return_value = self.config_file_name + + ova_file_path = self._copy_ova_to_tmpdir('testserver.ova') + ova_uri = 'file://' + ova_file_path + + oprocess = ovf_process._OVF_Process('task_id', 'ovf_proc', + self.img_repo) + self.assertEqual(ova_uri, oprocess.execute('test_image_id', ova_uri)) + + # Note that the extracted disk image is overwritten onto the input ova + # file + with open(ova_file_path, 'rb') as disk_image_file: + content = disk_image_file.read() + # b'ABCD' is the exact contents of the disk image file + # testserver-disk1.vmdk contained in the testserver.ova package used + # by this test + self.assertEqual(b'ABCD', content) + # 'DMTF:x86:VT-d' is the value in the testerver.ovf file in the + # testserver.ova package + self.image.extra_properties.update.assert_called_once_with( + {'cim_pasd_InstructionSetExtensionName': 'DMTF:x86:VT-d'}) + self.assertEqual('bare', self.image.container_format) + + @mock.patch.object(cfg.ConfigOpts, 'find_file') + def test_ovf_process_no_config_file(self, mock_find_file): + # Mimics a Glance deployment without the ovf-metadata.json file + mock_find_file.return_value = None + + ova_file_path = self._copy_ova_to_tmpdir('testserver.ova') + ova_uri = 'file://' + ova_file_path + + oprocess = ovf_process._OVF_Process('task_id', 'ovf_proc', + self.img_repo) + self.assertEqual(ova_uri, oprocess.execute('test_image_id', ova_uri)) + + # Note that the extracted disk image is overwritten onto the input + # ova file. + with open(ova_file_path, 'rb') as disk_image_file: + content = disk_image_file.read() + # b'ABCD' is the exact contents of the disk image file + # testserver-disk1.vmdk contained in the testserver.ova package used + # by this test + self.assertEqual(b'ABCD', content) + # No properties must be selected from the ovf file + self.image.extra_properties.update.assert_called_once_with({}) + self.assertEqual('bare', self.image.container_format) + + @mock.patch.object(cfg.ConfigOpts, 'find_file') + def test_ovf_process_not_admin(self, mock_find_file): + mock_find_file.return_value = self.config_file_name + + ova_file_path = self._copy_ova_to_tmpdir('testserver.ova') + ova_uri = 'file://' + ova_file_path + + self.image.context.is_admin = False + + oprocess = ovf_process._OVF_Process('task_id', 'ovf_proc', + self.img_repo) + self.assertRaises(RuntimeError, oprocess.execute, 'test_image_id', + ova_uri) + + def test_extract_ova_not_tar(self): + # testserver-not-tar.ova package is not in tar format + ova_file_path = os.path.join(self.test_ova_dir, + 'testserver-not-tar.ova') + iextractor = ovf_process.OVAImageExtractor() + with open(ova_file_path, 'rb') as ova_file: + self.assertRaises(tarfile.ReadError, iextractor.extract, ova_file) + + def test_extract_ova_no_disk(self): + # testserver-no-disk.ova package contains no disk image file + ova_file_path = os.path.join(self.test_ova_dir, + 'testserver-no-disk.ova') + iextractor = ovf_process.OVAImageExtractor() + with open(ova_file_path, 'rb') as ova_file: + self.assertRaises(KeyError, iextractor.extract, ova_file) + + def test_extract_ova_no_ovf(self): + # testserver-no-ovf.ova package contains no ovf file + ova_file_path = os.path.join(self.test_ova_dir, + 'testserver-no-ovf.ova') + iextractor = ovf_process.OVAImageExtractor() + with open(ova_file_path, 'rb') as ova_file: + self.assertRaises(RuntimeError, iextractor.extract, ova_file) + + def test_extract_ova_bad_ovf(self): + # testserver-bad-ovf.ova package has an ovf file that contains + # invalid xml + ova_file_path = os.path.join(self.test_ova_dir, + 'testserver-bad-ovf.ova') + iextractor = ovf_process.OVAImageExtractor() + with open(ova_file_path, 'rb') as ova_file: + self.assertRaises(ParseError, iextractor._parse_OVF, ova_file) diff --git a/glance/tests/var/testserver-bad-ovf.ova b/glance/tests/var/testserver-bad-ovf.ova new file mode 100644 index 00000000..bb89a2ea Binary files /dev/null and b/glance/tests/var/testserver-bad-ovf.ova differ diff --git a/glance/tests/var/testserver-no-disk.ova b/glance/tests/var/testserver-no-disk.ova new file mode 100644 index 00000000..5d291645 Binary files /dev/null and b/glance/tests/var/testserver-no-disk.ova differ diff --git a/glance/tests/var/testserver-no-ovf.ova b/glance/tests/var/testserver-no-ovf.ova new file mode 100644 index 00000000..e251d16d Binary files /dev/null and b/glance/tests/var/testserver-no-ovf.ova differ diff --git a/glance/tests/var/testserver-not-tar.ova b/glance/tests/var/testserver-not-tar.ova new file mode 100644 index 00000000..6c07869a Binary files /dev/null and b/glance/tests/var/testserver-not-tar.ova differ diff --git a/glance/tests/var/testserver.ova b/glance/tests/var/testserver.ova new file mode 100644 index 00000000..f593d29f Binary files /dev/null and b/glance/tests/var/testserver.ova differ diff --git a/setup.cfg b/setup.cfg index a7a101ff..91426106 100644 --- a/setup.cfg +++ b/setup.cfg @@ -56,6 +56,7 @@ glance.flows = glance.flows.import = convert = glance.async.flows.convert:get_flow introspect = glance.async.flows.introspect:get_flow + ovf_process = glance.async.flows.ovf_process:get_flow [build_sphinx] all_files = 1