From fbc07d3ad99dea6da4880325662ac52fc7f64f66 Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Wed, 21 Oct 2015 15:46:27 +0530 Subject: [PATCH] Refactor read_metadata() method This change: * Simplifies read_metadata() method. * Validates pickle header before attempting to unpickle. Change-Id: I08d4187f7f4cc963d095b2cd2bcee3039a7dc858 Signed-off-by: Prashanth Pai --- swiftonfile/swift/common/utils.py | 92 ++++++++++++++++--------------- test/unit/common/test_utils.py | 29 +++++----- 2 files changed, 62 insertions(+), 59 deletions(-) diff --git a/swiftonfile/swift/common/utils.py b/swiftonfile/swift/common/utils.py index 8e53505..e283a23 100644 --- a/swiftonfile/swift/common/utils.py +++ b/swiftonfile/swift/common/utils.py @@ -63,68 +63,72 @@ def normalize_timestamp(timestamp): return "%016.05f" % (float(timestamp)) +def serialize_metadata(metadata): + return pickle.dumps(metadata, PICKLE_PROTOCOL) + + +def deserialize_metadata(metastr): + """ + Returns dict populated with metadata if deserializing is successful. + Returns empty dict if deserialzing fails. + """ + if metastr.startswith('\x80\x02}') and metastr.endswith('.'): + # Assert that the metadata was indeed pickled before attempting + # to unpickle. This only *reduces* risk of malicious shell code + # being executed. However, it does NOT fix anything. + try: + return pickle.loads(metastr) + except (pickle.UnpicklingError, EOFError, AttributeError, + IndexError, ImportError, AssertionError): + return {} + else: + return {} + + def read_metadata(path_or_fd): """ - Helper function to read the pickled metadata from a File/Directory. + Helper function to read the serialized metadata from a File/Directory. :param path_or_fd: File/Directory path or fd from which to read metadata. :returns: dictionary of metadata """ - metadata = None - metadata_s = '' + metastr = '' key = 0 - while metadata is None: - try: - metadata_s += do_getxattr(path_or_fd, - '%s%s' % (METADATA_KEY, (key or ''))) - except IOError as err: - if err.errno == errno.ENODATA: - if key > 0: - # No errors reading the xattr keys, but since we have not - # been able to find enough chunks to get a successful - # unpickle operation, we consider the metadata lost, and - # drop the existing data so that the internal state can be - # recreated. - clean_metadata(path_or_fd) - # We either could not find any metadata key, or we could find - # some keys, but were not successful in performing the - # unpickling (missing keys perhaps)? Either way, just report - # to the caller we have no metadata. - metadata = {} - else: - # Note that we don't touch the keys on errors fetching the - # data since it could be a transient state. - raise SwiftOnFileSystemIOError( - err.errno, '%s, getxattr("%s", %s)' % (err.strerror, - path_or_fd, key)) - else: - try: - # If this key provides all or the remaining part of the pickle - # data, we don't need to keep searching for more keys. This - # means if we only need to store data in N xattr key/value - # pair, we only need to invoke xattr get N times. With large - # keys sizes we are shooting for N = 1. - metadata = pickle.loads(metadata_s) - assert isinstance(metadata, dict) - except (EOFError, pickle.UnpicklingError): - # We still are not able recognize this existing data collected - # as a pickled object. Make sure we loop around to try to get - # more from another xattr key. - metadata = None - key += 1 + try: + while True: + metastr += do_getxattr(path_or_fd, '%s%s' % + (METADATA_KEY, (key or ''))) + key += 1 + if len(metastr) < MAX_XATTR_SIZE: + # Prevent further getxattr calls + break + except IOError as err: + if err.errno != errno.ENODATA: + raise + + if not metastr: + return {} + + metadata = deserialize_metadata(metastr) + if not metadata: + # Empty dict i.e deserializing of metadata has failed, probably + # because it is invalid or incomplete or corrupt + clean_metadata(path_or_fd) + + assert isinstance(metadata, dict) return metadata def write_metadata(path_or_fd, metadata): """ - Helper function to write pickled metadata for a File/Directory. + Helper function to write serialized metadata for a File/Directory. :param path_or_fd: File/Directory path or fd to write the metadata :param metadata: dictionary of metadata write """ assert isinstance(metadata, dict) - metastr = pickle.dumps(metadata, PICKLE_PROTOCOL) + metastr = serialize_metadata(metadata) key = 0 while metastr: try: diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index b81d5d3..6a9d5e7 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -22,11 +22,11 @@ import xattr import cPickle as pickle import tempfile import hashlib -import tarfile import shutil from collections import defaultdict from mock import patch, Mock from swiftonfile.swift.common import utils +from swiftonfile.swift.common.utils import deserialize_metadata, serialize_metadata from swiftonfile.swift.common.exceptions import SwiftOnFileSystemOSError, \ SwiftOnFileSystemIOError from swift.common.exceptions import DiskFileNoSpace @@ -154,7 +154,7 @@ class TestUtils(unittest.TestCase): xkey = _xkey(path, utils.METADATA_KEY) assert len(_xattrs.keys()) == 1 assert xkey in _xattrs - assert orig_d == pickle.loads(_xattrs[xkey]) + assert orig_d == deserialize_metadata(_xattrs[xkey]) assert _xattr_op_cnt['set'] == 1 def test_write_metadata_err(self): @@ -205,13 +205,13 @@ class TestUtils(unittest.TestCase): assert xkey in _xattrs assert len(_xattrs[xkey]) <= utils.MAX_XATTR_SIZE payload += _xattrs[xkey] - assert orig_d == pickle.loads(payload) + assert orig_d == deserialize_metadata(payload) assert _xattr_op_cnt['set'] == 3, "%r" % _xattr_op_cnt def test_clean_metadata(self): path = "/tmp/foo/c" expected_d = {'a': 'y' * 150000} - expected_p = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + expected_p = serialize_metadata(expected_d) for i in range(0, 3): xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or '')) _xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE] @@ -223,7 +223,7 @@ class TestUtils(unittest.TestCase): def test_clean_metadata_err(self): path = "/tmp/foo/c" xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps({'a': 'y'}, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata({'a': 'y'}) _xattr_rem_err[xkey] = errno.EOPNOTSUPP try: utils.clean_metadata(path) @@ -237,7 +237,7 @@ class TestUtils(unittest.TestCase): path = "/tmp/foo/r" expected_d = {'a': 'y'} xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata(expected_d) res_d = utils.read_metadata(path) assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) assert _xattr_op_cnt['get'] == 1, "%r" % _xattr_op_cnt @@ -252,7 +252,7 @@ class TestUtils(unittest.TestCase): path = "/tmp/foo/r" expected_d = {'a': 'y'} xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata(expected_d) _xattr_get_err[xkey] = errno.EOPNOTSUPP try: utils.read_metadata(path) @@ -265,7 +265,7 @@ class TestUtils(unittest.TestCase): def test_read_metadata_multiple(self): path = "/tmp/foo/r" expected_d = {'a': 'y' * 150000} - expected_p = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + expected_p = serialize_metadata(expected_d) for i in range(0, 3): xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or '')) _xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE] @@ -273,12 +273,12 @@ class TestUtils(unittest.TestCase): assert not expected_p res_d = utils.read_metadata(path) assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) - assert _xattr_op_cnt['get'] == 3, "%r" % _xattr_op_cnt + assert _xattr_op_cnt['get'] == 4, "%r" % _xattr_op_cnt def test_read_metadata_multiple_one_missing(self): path = "/tmp/foo/r" expected_d = {'a': 'y' * 150000} - expected_p = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) + expected_p = serialize_metadata(expected_d) for i in range(0, 2): xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or '')) _xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE] @@ -287,7 +287,6 @@ class TestUtils(unittest.TestCase): res_d = utils.read_metadata(path) assert res_d == {} assert _xattr_op_cnt['get'] == 3, "%r" % _xattr_op_cnt - assert len(_xattrs.keys()) == 0, "Expected 0 keys, found %d" % len(_xattrs.keys()) def test_restore_metadata_none(self): # No initial metadata @@ -303,7 +302,7 @@ class TestUtils(unittest.TestCase): path = "/tmp/foo/i" initial_d = {'a': 'z'} xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps(initial_d, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata(initial_d) res_d = utils.restore_metadata(path, {'b': 'y'}) expected_d = {'a': 'z', 'b': 'y'} assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) @@ -315,7 +314,7 @@ class TestUtils(unittest.TestCase): path = "/tmp/foo/i" initial_d = {'a': 'z'} xkey = _xkey(path, utils.METADATA_KEY) - _xattrs[xkey] = pickle.dumps(initial_d, utils.PICKLE_PROTOCOL) + _xattrs[xkey] = serialize_metadata(initial_d) res_d = utils.restore_metadata(path, {}) expected_d = {'a': 'z'} assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) @@ -398,7 +397,7 @@ class TestUtils(unittest.TestCase): assert xkey in _xattrs assert _xattr_op_cnt['get'] == 1 assert _xattr_op_cnt['set'] == 1 - md = pickle.loads(_xattrs[xkey]) + md = deserialize_metadata(_xattrs[xkey]) assert r_md == md for key in self.obj_keys: @@ -420,7 +419,7 @@ class TestUtils(unittest.TestCase): assert xkey in _xattrs assert _xattr_op_cnt['get'] == 1 assert _xattr_op_cnt['set'] == 1 - md = pickle.loads(_xattrs[xkey]) + md = deserialize_metadata(_xattrs[xkey]) assert r_md == md for key in self.obj_keys: