# -*- coding:utf-8 -*-
# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for swift.obj.diskfile"""

import six.moves.cPickle as pickle
import os
import errno
import itertools
from unittest.util import safe_repr
import mock
import unittest
import email
import tempfile
import uuid
import xattr
import re
from collections import defaultdict
from random import shuffle, randint
from shutil import rmtree
from time import time
from tempfile import mkdtemp
from hashlib import md5
from contextlib import closing, contextmanager
from gzip import GzipFile

from eventlet import hubs, timeout, tpool
from swift.obj.diskfile import MD5_OF_EMPTY_STRING
from test.unit import (FakeLogger, mock as unit_mock, temptree,
                       patch_policies, debug_logger, EMPTY_ETAG,
                       make_timestamp_iter, DEFAULT_TEST_EC_TYPE)

from nose import SkipTest
from swift.obj import diskfile
from swift.common import utils
from swift.common.utils import hash_path, mkdirs, Timestamp, encode_timestamps
from swift.common import ring
from swift.common.splice import splice
from swift.common.exceptions import DiskFileNotExist, DiskFileQuarantined, \
    DiskFileDeviceUnavailable, DiskFileDeleted, DiskFileNotOpen, \
    DiskFileError, ReplicationLockTimeout, DiskFileCollision, \
    DiskFileExpired, SwiftException, DiskFileNoSpace, DiskFileXattrNotSupported
from swift.common.storage_policy import (
    POLICIES, get_policy_string, StoragePolicy, ECStoragePolicy,
    BaseStoragePolicy, REPL_POLICY, EC_POLICY)


test_policies = [
    StoragePolicy(0, name='zero', is_default=True),
    ECStoragePolicy(1, name='one', is_default=False,
                    ec_type=DEFAULT_TEST_EC_TYPE,
                    ec_ndata=10, ec_nparity=4),
]


def find_paths_with_matching_suffixes(needed_matches=2, needed_suffixes=3):
    paths = defaultdict(list)
    while True:
        path = ('a', 'c', uuid.uuid4().hex)
        hash_ = hash_path(*path)
        suffix = hash_[-3:]
        paths[suffix].append(path)
        if len(paths) < needed_suffixes:
            # in the extreamly unlikely situation where you land the matches
            # you need before you get the total suffixes you need - it's
            # simpler to just ignore this suffix for now
            continue
        if len(paths[suffix]) >= needed_matches:
            break
    return paths, suffix


def _create_test_ring(path, policy):
    ring_name = get_policy_string('object', policy)
    testgz = os.path.join(path, ring_name + '.ring.gz')
    intended_replica2part2dev_id = [
        [0, 1, 2, 3, 4, 5, 6],
        [1, 2, 3, 0, 5, 6, 4],
        [2, 3, 0, 1, 6, 4, 5]]
    intended_devs = [
        {'id': 0, 'device': 'sda1', 'zone': 0, 'ip': '127.0.0.0',
         'port': 6200},
        {'id': 1, 'device': 'sda1', 'zone': 1, 'ip': '127.0.0.1',
         'port': 6200},
        {'id': 2, 'device': 'sda1', 'zone': 2, 'ip': '127.0.0.2',
         'port': 6200},
        {'id': 3, 'device': 'sda1', 'zone': 4, 'ip': '127.0.0.3',
         'port': 6200},
        {'id': 4, 'device': 'sda1', 'zone': 5, 'ip': '127.0.0.4',
         'port': 6200},
        {'id': 5, 'device': 'sda1', 'zone': 6,
         'ip': 'fe80::202:b3ff:fe1e:8329', 'port': 6200},
        {'id': 6, 'device': 'sda1', 'zone': 7,
         'ip': '2001:0db8:85a3:0000:0000:8a2e:0370:7334',
         'port': 6200}]
    intended_part_shift = 30
    intended_reload_time = 15
    with closing(GzipFile(testgz, 'wb')) as f:
        pickle.dump(
            ring.RingData(intended_replica2part2dev_id, intended_devs,
                          intended_part_shift),
            f)
    return ring.Ring(path, ring_name=ring_name,
                     reload_time=intended_reload_time)


@patch_policies
class TestDiskFileModuleMethods(unittest.TestCase):

    def setUp(self):
        utils.HASH_PATH_SUFFIX = 'endcap'
        utils.HASH_PATH_PREFIX = ''
        # Setup a test ring per policy (stolen from common/test_ring.py)
        self.testdir = tempfile.mkdtemp()
        self.devices = os.path.join(self.testdir, 'node')
        rmtree(self.testdir, ignore_errors=1)
        os.mkdir(self.testdir)
        os.mkdir(self.devices)
        self.existing_device = 'sda1'
        os.mkdir(os.path.join(self.devices, self.existing_device))
        self.objects = os.path.join(self.devices, self.existing_device,
                                    'objects')
        os.mkdir(self.objects)
        self.parts = {}
        for part in ['0', '1', '2', '3']:
            self.parts[part] = os.path.join(self.objects, part)
            os.mkdir(os.path.join(self.objects, part))
        self.ring = _create_test_ring(self.testdir, POLICIES.legacy)
        self.conf = dict(
            swift_dir=self.testdir, devices=self.devices, mount_check='false',
            timeout='300', stats_interval='1')
        self.df_mgr = diskfile.DiskFileManager(self.conf, FakeLogger())

    def tearDown(self):
        rmtree(self.testdir, ignore_errors=1)

    def _create_diskfile(self, policy):
        return self.df_mgr.get_diskfile(self.existing_device,
                                        '0', 'a', 'c', 'o',
                                        policy=policy)

    def test_extract_policy(self):
        # good path names
        pn = 'objects/0/606/1984527ed7ef6247c78606/1401379842.14643.data'
        self.assertEqual(diskfile.extract_policy(pn), POLICIES[0])
        pn = 'objects-1/0/606/198452b6ef6247c78606/1401379842.14643.data'
        self.assertEqual(diskfile.extract_policy(pn), POLICIES[1])

        # leading slash
        pn = '/objects/0/606/1984527ed7ef6247c78606/1401379842.14643.data'
        self.assertEqual(diskfile.extract_policy(pn), POLICIES[0])
        pn = '/objects-1/0/606/198452b6ef6247c78606/1401379842.14643.data'
        self.assertEqual(diskfile.extract_policy(pn), POLICIES[1])

        # full paths
        good_path = '/srv/node/sda1/objects-1/1/abc/def/1234.data'
        self.assertEqual(diskfile.extract_policy(good_path), POLICIES[1])
        good_path = '/srv/node/sda1/objects/1/abc/def/1234.data'
        self.assertEqual(diskfile.extract_policy(good_path), POLICIES[0])

        # short paths
        path = '/srv/node/sda1/objects/1/1234.data'
        self.assertEqual(diskfile.extract_policy(path), POLICIES[0])
        path = '/srv/node/sda1/objects-1/1/1234.data'
        self.assertEqual(diskfile.extract_policy(path), POLICIES[1])

        # well formatted but, unknown policy index
        pn = 'objects-2/0/606/198427efcff042c78606/1401379842.14643.data'
        self.assertEqual(diskfile.extract_policy(pn), None)

        # malformed path
        self.assertEqual(diskfile.extract_policy(''), None)
        bad_path = '/srv/node/sda1/objects-t/1/abc/def/1234.data'
        self.assertEqual(diskfile.extract_policy(bad_path), None)
        pn = 'XXXX/0/606/1984527ed42b6ef6247c78606/1401379842.14643.data'
        self.assertEqual(diskfile.extract_policy(pn), None)
        bad_path = '/srv/node/sda1/foo-1/1/abc/def/1234.data'
        self.assertEqual(diskfile.extract_policy(bad_path), None)
        bad_path = '/srv/node/sda1/obj1/1/abc/def/1234.data'
        self.assertEqual(diskfile.extract_policy(bad_path), None)

    def test_quarantine_renamer(self):
        for policy in POLICIES:
            # we use this for convenience, not really about a diskfile layout
            df = self._create_diskfile(policy=policy)
            mkdirs(df._datadir)
            exp_dir = os.path.join(self.devices, 'quarantined',
                                   diskfile.get_data_dir(policy),
                                   os.path.basename(df._datadir))
            qbit = os.path.join(df._datadir, 'qbit')
            with open(qbit, 'w') as f:
                f.write('abc')
            to_dir = diskfile.quarantine_renamer(self.devices, qbit)
            self.assertEqual(to_dir, exp_dir)
            self.assertRaises(OSError, diskfile.quarantine_renamer,
                              self.devices, qbit)

    def test_get_data_dir(self):
        self.assertEqual(diskfile.get_data_dir(POLICIES[0]),
                         diskfile.DATADIR_BASE)
        self.assertEqual(diskfile.get_data_dir(POLICIES[1]),
                         diskfile.DATADIR_BASE + "-1")
        self.assertRaises(ValueError, diskfile.get_data_dir, 'junk')

        self.assertRaises(ValueError, diskfile.get_data_dir, 99)

    def test_get_async_dir(self):
        self.assertEqual(diskfile.get_async_dir(POLICIES[0]),
                         diskfile.ASYNCDIR_BASE)
        self.assertEqual(diskfile.get_async_dir(POLICIES[1]),
                         diskfile.ASYNCDIR_BASE + "-1")
        self.assertRaises(ValueError, diskfile.get_async_dir, 'junk')

        self.assertRaises(ValueError, diskfile.get_async_dir, 99)

    def test_get_tmp_dir(self):
        self.assertEqual(diskfile.get_tmp_dir(POLICIES[0]),
                         diskfile.TMP_BASE)
        self.assertEqual(diskfile.get_tmp_dir(POLICIES[1]),
                         diskfile.TMP_BASE + "-1")
        self.assertRaises(ValueError, diskfile.get_tmp_dir, 'junk')

        self.assertRaises(ValueError, diskfile.get_tmp_dir, 99)

    def test_pickle_async_update_tmp_dir(self):
        for policy in POLICIES:
            if int(policy) == 0:
                tmp_part = 'tmp'
            else:
                tmp_part = 'tmp-%d' % policy
            tmp_path = os.path.join(
                self.devices, self.existing_device, tmp_part)
            self.assertFalse(os.path.isdir(tmp_path))
            pickle_args = (self.existing_device, 'a', 'c', 'o',
                           'data', 0.0, policy)
            # async updates don't create their tmpdir on their own
            self.assertRaises(OSError, self.df_mgr.pickle_async_update,
                              *pickle_args)
            os.makedirs(tmp_path)
            # now create a async update
            self.df_mgr.pickle_async_update(*pickle_args)
            # check tempdir
            self.assertTrue(os.path.isdir(tmp_path))


@patch_policies
class TestObjectAuditLocationGenerator(unittest.TestCase):
    def _make_file(self, path):
        try:
            os.makedirs(os.path.dirname(path))
        except OSError as err:
            if err.errno != errno.EEXIST:
                raise

        with open(path, 'w'):
            pass

    def test_audit_location_class(self):
        al = diskfile.AuditLocation('abc', '123', '_-_',
                                    policy=POLICIES.legacy)
        self.assertEqual(str(al), 'abc')

    def test_finding_of_hashdirs(self):
        with temptree([]) as tmpdir:
            # the good
            os.makedirs(os.path.join(tmpdir, "sdp", "objects", "1519", "aca",
                                     "5c1fdc1ffb12e5eaf84edc30d8b67aca"))
            os.makedirs(os.path.join(tmpdir, "sdp", "objects", "1519", "aca",
                                     "fdfd184d39080020bc8b487f8a7beaca"))
            os.makedirs(os.path.join(tmpdir, "sdp", "objects", "1519", "df2",
                                     "b0fe7af831cc7b1af5bf486b1c841df2"))
            os.makedirs(os.path.join(tmpdir, "sdp", "objects", "9720", "ca5",
                                     "4a943bc72c2e647c4675923d58cf4ca5"))
            os.makedirs(os.path.join(tmpdir, "sdq", "objects", "3071", "8eb",
                                     "fcd938702024c25fef6c32fef05298eb"))
            os.makedirs(os.path.join(tmpdir, "sdp", "objects-1", "9970", "ca5",
                                     "4a943bc72c2e647c4675923d58cf4ca5"))
            os.makedirs(os.path.join(tmpdir, "sdq", "objects-2", "9971", "8eb",
                                     "fcd938702024c25fef6c32fef05298eb"))
            os.makedirs(os.path.join(tmpdir, "sdq", "objects-99", "9972",
                                     "8eb",
                                     "fcd938702024c25fef6c32fef05298eb"))
            # the bad
            os.makedirs(os.path.join(tmpdir, "sdq", "objects-", "1135",
                                     "6c3",
                                     "fcd938702024c25fef6c32fef05298eb"))
            os.makedirs(os.path.join(tmpdir, "sdq", "objects-fud", "foo"))
            os.makedirs(os.path.join(tmpdir, "sdq", "objects-+1", "foo"))

            self._make_file(os.path.join(tmpdir, "sdp", "objects", "1519",
                                         "fed"))
            self._make_file(os.path.join(tmpdir, "sdq", "objects", "9876"))

            # the empty
            os.makedirs(os.path.join(tmpdir, "sdr"))
            os.makedirs(os.path.join(tmpdir, "sds", "objects"))
            os.makedirs(os.path.join(tmpdir, "sdt", "objects", "9601"))
            os.makedirs(os.path.join(tmpdir, "sdu", "objects", "6499", "f80"))

            # the irrelevant
            os.makedirs(os.path.join(tmpdir, "sdv", "accounts", "77", "421",
                                     "4b8c86149a6d532f4af018578fd9f421"))
            os.makedirs(os.path.join(tmpdir, "sdw", "containers", "28", "51e",
                                     "4f9eee668b66c6f0250bfa3c7ab9e51e"))

            logger = debug_logger()
            locations = [(loc.path, loc.device, loc.partition, loc.policy)
                         for loc in diskfile.object_audit_location_generator(
                             devices=tmpdir, mount_check=False,
                             logger=logger)]
            locations.sort()

            # expect some warnings about those bad dirs
            warnings = logger.get_lines_for_level('warning')
            self.assertEqual(set(warnings), set([
                ("Directory 'objects-' does not map to a valid policy "
                 "(Unknown policy, for index '')"),
                ("Directory 'objects-2' does not map to a valid policy "
                 "(Unknown policy, for index '2')"),
                ("Directory 'objects-99' does not map to a valid policy "
                 "(Unknown policy, for index '99')"),
                ("Directory 'objects-fud' does not map to a valid policy "
                 "(Unknown policy, for index 'fud')"),
                ("Directory 'objects-+1' does not map to a valid policy "
                 "(Unknown policy, for index '+1')"),
            ]))

            expected =  \
                [(os.path.join(tmpdir, "sdp", "objects-1", "9970", "ca5",
                               "4a943bc72c2e647c4675923d58cf4ca5"),
                  "sdp", "9970", POLICIES[1]),
                 (os.path.join(tmpdir, "sdp", "objects", "1519", "aca",
                               "5c1fdc1ffb12e5eaf84edc30d8b67aca"),
                  "sdp", "1519", POLICIES[0]),
                 (os.path.join(tmpdir, "sdp", "objects", "1519", "aca",
                               "fdfd184d39080020bc8b487f8a7beaca"),
                  "sdp", "1519", POLICIES[0]),
                 (os.path.join(tmpdir, "sdp", "objects", "1519", "df2",
                               "b0fe7af831cc7b1af5bf486b1c841df2"),
                  "sdp", "1519", POLICIES[0]),
                 (os.path.join(tmpdir, "sdp", "objects", "9720", "ca5",
                               "4a943bc72c2e647c4675923d58cf4ca5"),
                  "sdp", "9720", POLICIES[0]),
                 (os.path.join(tmpdir, "sdq", "objects", "3071", "8eb",
                               "fcd938702024c25fef6c32fef05298eb"),
                  "sdq", "3071", POLICIES[0]),
                 ]
            self.assertEqual(locations, expected)

            # Reset status file for next run
            diskfile.clear_auditor_status(tmpdir)

            # now without a logger
            locations = [(loc.path, loc.device, loc.partition, loc.policy)
                         for loc in diskfile.object_audit_location_generator(
                             devices=tmpdir, mount_check=False)]
            locations.sort()
            self.assertEqual(locations, expected)

    def test_skipping_unmounted_devices(self):
        def mock_ismount(path):
            return path.endswith('sdp')

        with mock.patch('swift.obj.diskfile.ismount', mock_ismount):
            with temptree([]) as tmpdir:
                os.makedirs(os.path.join(tmpdir, "sdp", "objects",
                                         "2607", "df3",
                                         "ec2871fe724411f91787462f97d30df3"))
                os.makedirs(os.path.join(tmpdir, "sdq", "objects",
                                         "9785", "a10",
                                         "4993d582f41be9771505a8d4cb237a10"))

                locations = [
                    (loc.path, loc.device, loc.partition, loc.policy)
                    for loc in diskfile.object_audit_location_generator(
                        devices=tmpdir, mount_check=True)]
                locations.sort()

                self.assertEqual(
                    locations,
                    [(os.path.join(tmpdir, "sdp", "objects",
                                   "2607", "df3",
                                   "ec2871fe724411f91787462f97d30df3"),
                      "sdp", "2607", POLICIES[0])])

                # Do it again, this time with a logger.
                ml = mock.MagicMock()
                locations = [
                    (loc.path, loc.device, loc.partition, loc.policy)
                    for loc in diskfile.object_audit_location_generator(
                        devices=tmpdir, mount_check=True, logger=ml)]
                ml.debug.assert_called_once_with(
                    'Skipping %s as it is not mounted',
                    'sdq')

    def test_skipping_files(self):
        with temptree([]) as tmpdir:
            os.makedirs(os.path.join(tmpdir, "sdp", "objects",
                                     "2607", "df3",
                                     "ec2871fe724411f91787462f97d30df3"))
            with open(os.path.join(tmpdir, "garbage"), "wb") as fh:
                fh.write('')

            locations = [
                (loc.path, loc.device, loc.partition, loc.policy)
                for loc in diskfile.object_audit_location_generator(
                    devices=tmpdir, mount_check=False)]

            self.assertEqual(
                locations,
                [(os.path.join(tmpdir, "sdp", "objects",
                               "2607", "df3",
                               "ec2871fe724411f91787462f97d30df3"),
                  "sdp", "2607", POLICIES[0])])

            # Do it again, this time with a logger.
            ml = mock.MagicMock()
            locations = [
                (loc.path, loc.device, loc.partition, loc.policy)
                for loc in diskfile.object_audit_location_generator(
                    devices=tmpdir, mount_check=False, logger=ml)]
            ml.debug.assert_called_once_with(
                'Skipping %s: Not a directory' %
                os.path.join(tmpdir, "garbage"))

    def test_only_catch_expected_errors(self):
        # Crazy exceptions should still escape object_audit_location_generator
        # so that errors get logged and a human can see what's going wrong;
        # only normal FS corruption should be skipped over silently.

        def list_locations(dirname):
            return [(loc.path, loc.device, loc.partition, loc.policy)
                    for loc in diskfile.object_audit_location_generator(
                        devices=dirname, mount_check=False)]

        real_listdir = os.listdir

        def splode_if_endswith(suffix):
            def sploder(path):
                if path.endswith(suffix):
                    raise OSError(errno.EACCES, "don't try to ad-lib")
                else:
                    return real_listdir(path)
            return sploder

        with temptree([]) as tmpdir:
            os.makedirs(os.path.join(tmpdir, "sdf", "objects",
                                     "2607", "b54",
                                     "fe450ec990a88cc4b252b181bab04b54"))
            with mock.patch('os.listdir', splode_if_endswith("sdf/objects")):
                self.assertRaises(OSError, list_locations, tmpdir)
            with mock.patch('os.listdir', splode_if_endswith("2607")):
                self.assertRaises(OSError, list_locations, tmpdir)
            with mock.patch('os.listdir', splode_if_endswith("b54")):
                self.assertRaises(OSError, list_locations, tmpdir)

    def test_auditor_status(self):
        with temptree([]) as tmpdir:
            os.makedirs(os.path.join(tmpdir, "sdf", "objects", "1", "a", "b"))
            os.makedirs(os.path.join(tmpdir, "sdf", "objects", "2", "a", "b"))

            # Auditor starts, there are two partitions to check
            gen = diskfile.object_audit_location_generator(tmpdir, False)
            gen.next()
            gen.next()

            # Auditor stopped for some reason without raising StopIterator in
            # the generator and restarts There is now only one remaining
            # partition to check
            gen = diskfile.object_audit_location_generator(tmpdir, False)
            gen.next()

            # There are no more remaining partitions
            self.assertRaises(StopIteration, gen.next)

            # There are no partitions to check if the auditor restarts another
            # time and the status files have not been cleared
            gen = diskfile.object_audit_location_generator(tmpdir, False)
            self.assertRaises(StopIteration, gen.next)

            # Reset status file
            diskfile.clear_auditor_status(tmpdir)

            # If the auditor restarts another time, we expect to
            # check two partitions again, because the remaining
            # partitions were empty and a new listdir was executed
            gen = diskfile.object_audit_location_generator(tmpdir, False)
            gen.next()
            gen.next()


class TestDiskFileRouter(unittest.TestCase):

    def test_register(self):
        with mock.patch.dict(
                diskfile.DiskFileRouter.policy_type_to_manager_cls, {}):
            @diskfile.DiskFileRouter.register('test-policy')
            class TestDiskFileManager(diskfile.DiskFileManager):
                pass

            @BaseStoragePolicy.register('test-policy')
            class TestStoragePolicy(BaseStoragePolicy):
                pass

            with patch_policies([TestStoragePolicy(0, 'test')]):
                router = diskfile.DiskFileRouter({}, debug_logger('test'))
                manager = router[POLICIES.default]
                self.assertTrue(isinstance(manager, TestDiskFileManager))


class BaseDiskFileTestMixin(object):
    """
    Bag of helpers that are useful in the per-policy DiskFile test classes.
    """

    def _manager_mock(self, manager_attribute_name, df=None):
        mgr_cls = df._manager.__class__ if df else self.mgr_cls
        return '.'.join([
            mgr_cls.__module__, mgr_cls.__name__, manager_attribute_name])

    def _assertDictContainsSubset(self, subset, dictionary, msg=None):
        """Checks whether dictionary is a superset of subset."""
        # This is almost identical to the method in python3.4 version of
        # unitest.case.TestCase.assertDictContainsSubset, reproduced here to
        # avoid the deprecation warning in the original when using python3.
        missing = []
        mismatched = []
        for key, value in subset.items():
            if key not in dictionary:
                missing.append(key)
            elif value != dictionary[key]:
                mismatched.append('%s, expected: %s, actual: %s' %
                                  (safe_repr(key), safe_repr(value),
                                   safe_repr(dictionary[key])))

        if not (missing or mismatched):
            return

        standardMsg = ''
        if missing:
            standardMsg = 'Missing: %s' % ','.join(safe_repr(m) for m in
                                                   missing)
        if mismatched:
            if standardMsg:
                standardMsg += '; '
            standardMsg += 'Mismatched values: %s' % ','.join(mismatched)

        self.fail(self._formatMessage(msg, standardMsg))


class DiskFileManagerMixin(BaseDiskFileTestMixin):
    """
    Abstract test method mixin for concrete test cases - this class
    won't get picked up by test runners because it doesn't subclass
    unittest.TestCase and doesn't have [Tt]est in the name.
    """

    # set mgr_cls on subclasses
    mgr_cls = None

    def setUp(self):
        self.tmpdir = mkdtemp()
        self.testdir = os.path.join(
            self.tmpdir, 'tmp_test_obj_server_DiskFile')
        self.existing_device1 = 'sda1'
        self.existing_device2 = 'sda2'
        for policy in POLICIES:
            mkdirs(os.path.join(self.testdir, self.existing_device1,
                                diskfile.get_tmp_dir(policy)))
            mkdirs(os.path.join(self.testdir, self.existing_device2,
                                diskfile.get_tmp_dir(policy)))
        self._orig_tpool_exc = tpool.execute
        tpool.execute = lambda f, *args, **kwargs: f(*args, **kwargs)
        self.conf = dict(devices=self.testdir, mount_check='false',
                         keep_cache_size=2 * 1024)
        self.logger = debug_logger('test-' + self.__class__.__name__)
        self.df_mgr = self.mgr_cls(self.conf, self.logger)
        self.df_router = diskfile.DiskFileRouter(self.conf, self.logger)

    def tearDown(self):
        rmtree(self.tmpdir, ignore_errors=1)

    def _get_diskfile(self, policy, frag_index=None):
        df_mgr = self.df_router[policy]
        return df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
                                   policy=policy, frag_index=frag_index)

    def _test_get_ondisk_files(self, scenarios, policy,
                               frag_index=None):
        class_under_test = self._get_diskfile(policy, frag_index=frag_index)
        for test in scenarios:
            # test => [('filename.ext', '.ext'|False, ...), ...]
            expected = {
                ext[1:] + '_file': os.path.join(
                    class_under_test._datadir, filename)
                for (filename, ext) in [v[:2] for v in test]
                if ext in ('.data', '.meta', '.ts')}
            # list(zip(...)) for py3 compatibility (zip is lazy there)
            files = list(list(zip(*test))[0])

            for _order in ('ordered', 'shuffled', 'shuffled'):
                class_under_test = self._get_diskfile(policy, frag_index)
                try:
                    actual = class_under_test._get_ondisk_files(files)
                    self._assertDictContainsSubset(
                        expected, actual,
                        'Expected %s from %s but got %s'
                        % (expected, files, actual))
                except AssertionError as e:
                    self.fail('%s with files %s' % (str(e), files))
                shuffle(files)

    def _test_cleanup_ondisk_files_files(self, scenarios, policy,
                                         reclaim_age=None):
        # check that expected files are left in hashdir after cleanup
        for test in scenarios:
            class_under_test = self.df_router[policy]
            # list(zip(...)) for py3 compatibility (zip is lazy there)
            files = list(list(zip(*test))[0])
            hashdir = os.path.join(self.testdir, str(uuid.uuid4()))
            os.mkdir(hashdir)
            for fname in files:
                open(os.path.join(hashdir, fname), 'w')
            expected_after_cleanup = set([f[0] for f in test
                                          if (f[2] if len(f) > 2 else f[1])])
            if reclaim_age:
                class_under_test.cleanup_ondisk_files(
                    hashdir, reclaim_age=reclaim_age)
            else:
                with mock.patch('swift.obj.diskfile.time') as mock_time:
                    # don't reclaim anything
                    mock_time.time.return_value = 0.0
                    class_under_test.cleanup_ondisk_files(hashdir)
            after_cleanup = set(os.listdir(hashdir))
            errmsg = "expected %r, got %r for test %r" % (
                sorted(expected_after_cleanup), sorted(after_cleanup), test
            )
            self.assertEqual(expected_after_cleanup, after_cleanup, errmsg)

    def _test_yield_hashes_cleanup(self, scenarios, policy):
        # opportunistic test to check that yield_hashes cleans up dir using
        # same scenarios as passed to _test_cleanup_ondisk_files_files
        for test in scenarios:
            class_under_test = self.df_router[policy]
            # list(zip(...)) for py3 compatibility (zip is lazy there)
            files = list(list(zip(*test))[0])
            dev_path = os.path.join(self.testdir, str(uuid.uuid4()))
            hashdir = os.path.join(
                dev_path, diskfile.get_data_dir(policy),
                '0', 'abc', '9373a92d072897b136b3fc06595b4abc')
            os.makedirs(hashdir)
            for fname in files:
                open(os.path.join(hashdir, fname), 'w')
            expected_after_cleanup = set([f[0] for f in test
                                          if f[1] or len(f) > 2 and f[2]])
            with mock.patch('swift.obj.diskfile.time') as mock_time:
                # don't reclaim anything
                mock_time.time.return_value = 0.0
                mocked = 'swift.obj.diskfile.BaseDiskFileManager.get_dev_path'
                with mock.patch(mocked) as mock_path:
                    mock_path.return_value = dev_path
                    for _ in class_under_test.yield_hashes(
                            'ignored', '0', policy, suffixes=['abc']):
                        # return values are tested in test_yield_hashes_*
                        pass
            after_cleanup = set(os.listdir(hashdir))
            errmsg = "expected %r, got %r for test %r" % (
                sorted(expected_after_cleanup), sorted(after_cleanup), test
            )
            self.assertEqual(expected_after_cleanup, after_cleanup, errmsg)

    def test_get_ondisk_files_with_empty_dir(self):
        files = []
        expected = dict(
            data_file=None, meta_file=None, ctype_file=None, ts_file=None)
        for policy in POLICIES:
            for frag_index in (0, None, '14'):
                # check manager
                df_mgr = self.df_router[policy]
                datadir = os.path.join('/srv/node/sdb1/',
                                       diskfile.get_data_dir(policy))
                actual = df_mgr.get_ondisk_files(files, datadir)
                self._assertDictContainsSubset(expected, actual)
                # check diskfile under the hood
                df = self._get_diskfile(policy, frag_index=frag_index)
                actual = df._get_ondisk_files(files)
                self._assertDictContainsSubset(expected, actual)
                # check diskfile open
                self.assertRaises(DiskFileNotExist, df.open)

    def test_get_ondisk_files_with_unexpected_file(self):
        unexpected_files = ['junk', 'junk.data', '.junk']
        timestamp = next(make_timestamp_iter())
        tomb_file = timestamp.internal + '.ts'
        for policy in POLICIES:
            for unexpected in unexpected_files:
                files = [unexpected, tomb_file]
                df_mgr = self.df_router[policy]
                df_mgr.logger = FakeLogger()
                datadir = os.path.join('/srv/node/sdb1/',
                                       diskfile.get_data_dir(policy))

                results = df_mgr.get_ondisk_files(files, datadir)

                expected = {'ts_file': os.path.join(datadir, tomb_file)}
                self._assertDictContainsSubset(expected, results)

                log_lines = df_mgr.logger.get_lines_for_level('warning')
                self.assertTrue(
                    log_lines[0].startswith(
                        'Unexpected file %s'
                        % os.path.join(datadir, unexpected)))

    def test_cleanup_ondisk_files_reclaim_non_data_files(self):
        # Each scenario specifies a list of (filename, extension, [survives])
        # tuples. If extension is set or 'survives' is True, the filename
        # should still be in the dir after cleanup.
        much_older = Timestamp(time() - 2000).internal
        older = Timestamp(time() - 1001).internal
        newer = Timestamp(time() - 900).internal
        scenarios = [
            [('%s.ts' % older, False, False)],

            # fresh tombstone is preserved
            [('%s.ts' % newer, '.ts', True)],

            # tombstone reclaimed despite junk file
            [('junk', False, True),
             ('%s.ts' % much_older, '.ts', False)],

            # fresh .meta not reclaimed even if isolated
            [('%s.meta' % newer, '.meta')],

            # fresh .meta not reclaimed when tombstone is reclaimed
            [('%s.meta' % newer, '.meta'),
             ('%s.ts' % older, False, False)],

            # stale isolated .meta is reclaimed
            [('%s.meta' % older, False, False)],

            # stale .meta is reclaimed along with tombstone
            [('%s.meta' % older, False, False),
             ('%s.ts' % older, False, False)]]

        self._test_cleanup_ondisk_files_files(scenarios, POLICIES.default,
                                              reclaim_age=1000)

    def test_construct_dev_path(self):
        res_path = self.df_mgr.construct_dev_path('abc')
        self.assertEqual(os.path.join(self.df_mgr.devices, 'abc'), res_path)

    def test_pickle_async_update(self):
        self.df_mgr.logger.increment = mock.MagicMock()
        ts = Timestamp(10000.0).internal
        with mock.patch('swift.obj.diskfile.write_pickle') as wp:
            self.df_mgr.pickle_async_update(self.existing_device1,
                                            'a', 'c', 'o',
                                            dict(a=1, b=2), ts, POLICIES[0])
            dp = self.df_mgr.construct_dev_path(self.existing_device1)
            ohash = diskfile.hash_path('a', 'c', 'o')
            wp.assert_called_with({'a': 1, 'b': 2},
                                  os.path.join(
                                      dp, diskfile.get_async_dir(POLICIES[0]),
                                      ohash[-3:], ohash + '-' + ts),
                                  os.path.join(dp, 'tmp'))
        self.df_mgr.logger.increment.assert_called_with('async_pendings')

    def test_object_audit_location_generator(self):
        locations = list(self.df_mgr.object_audit_location_generator())
        self.assertEqual(locations, [])

    def test_replication_lock_on(self):
        # Double check settings
        self.df_mgr.replication_one_per_device = True
        self.df_mgr.replication_lock_timeout = 0.1
        dev_path = os.path.join(self.testdir, self.existing_device1)
        with self.df_mgr.replication_lock(self.existing_device1):
            lock_exc = None
            exc = None
            try:
                with self.df_mgr.replication_lock(self.existing_device1):
                    raise Exception(
                        '%r was not replication locked!' % dev_path)
            except ReplicationLockTimeout as err:
                lock_exc = err
            except Exception as err:
                exc = err
            self.assertTrue(lock_exc is not None)
            self.assertTrue(exc is None)

    def test_replication_lock_off(self):
        # Double check settings
        self.df_mgr.replication_one_per_device = False
        self.df_mgr.replication_lock_timeout = 0.1
        dev_path = os.path.join(self.testdir, self.existing_device1)
        with self.df_mgr.replication_lock(dev_path):
            lock_exc = None
            exc = None
            try:
                with self.df_mgr.replication_lock(dev_path):
                    raise Exception(
                        '%r was not replication locked!' % dev_path)
            except ReplicationLockTimeout as err:
                lock_exc = err
            except Exception as err:
                exc = err
            self.assertTrue(lock_exc is None)
            self.assertTrue(exc is not None)

    def test_replication_lock_another_device_fine(self):
        # Double check settings
        self.df_mgr.replication_one_per_device = True
        self.df_mgr.replication_lock_timeout = 0.1
        with self.df_mgr.replication_lock(self.existing_device1):
            lock_exc = None
            try:
                with self.df_mgr.replication_lock(self.existing_device2):
                    pass
            except ReplicationLockTimeout as err:
                lock_exc = err
            self.assertTrue(lock_exc is None)

    def test_missing_splice_warning(self):
        logger = FakeLogger()
        with mock.patch('swift.common.splice.splice._c_splice', None):
            self.conf['splice'] = 'yes'
            mgr = diskfile.DiskFileManager(self.conf, logger)

        warnings = logger.get_lines_for_level('warning')
        self.assertTrue(len(warnings) > 0)
        self.assertTrue('splice()' in warnings[-1])
        self.assertFalse(mgr.use_splice)

    def test_get_diskfile_from_hash_dev_path_fail(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value=None)
        with mock.patch(self._manager_mock('diskfile_cls')), \
                mock.patch(self._manager_mock(
                    'cleanup_ondisk_files')) as cleanup, \
                mock.patch('swift.obj.diskfile.read_metadata') as readmeta:
            cleanup.return_value = {'files': ['1381679759.90941.data']}
            readmeta.return_value = {'name': '/a/c/o'}
            self.assertRaises(
                DiskFileDeviceUnavailable,
                self.df_mgr.get_diskfile_from_hash,
                'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0])

    def test_get_diskfile_from_hash_not_dir(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
        with mock.patch(self._manager_mock('diskfile_cls')), \
                mock.patch(self._manager_mock(
                    'cleanup_ondisk_files')) as cleanup, \
                mock.patch('swift.obj.diskfile.read_metadata') as readmeta, \
                mock.patch(self._manager_mock(
                    'quarantine_renamer')) as quarantine_renamer:
            osexc = OSError()
            osexc.errno = errno.ENOTDIR
            cleanup.side_effect = osexc
            readmeta.return_value = {'name': '/a/c/o'}
            self.assertRaises(
                DiskFileNotExist,
                self.df_mgr.get_diskfile_from_hash,
                'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0])
            quarantine_renamer.assert_called_once_with(
                '/srv/dev/',
                '/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900')

    def test_get_diskfile_from_hash_no_dir(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
        with mock.patch(self._manager_mock('diskfile_cls')), \
                mock.patch(self._manager_mock(
                    'cleanup_ondisk_files')) as cleanup, \
                mock.patch('swift.obj.diskfile.read_metadata') as readmeta:
            osexc = OSError()
            osexc.errno = errno.ENOENT
            cleanup.side_effect = osexc
            readmeta.return_value = {'name': '/a/c/o'}
            self.assertRaises(
                DiskFileNotExist,
                self.df_mgr.get_diskfile_from_hash,
                'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0])

    def test_get_diskfile_from_hash_other_oserror(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
        with mock.patch(self._manager_mock('diskfile_cls')), \
                mock.patch(self._manager_mock(
                    'cleanup_ondisk_files')) as cleanup, \
                mock.patch('swift.obj.diskfile.read_metadata') as readmeta:
            osexc = OSError()
            cleanup.side_effect = osexc
            readmeta.return_value = {'name': '/a/c/o'}
            self.assertRaises(
                OSError,
                self.df_mgr.get_diskfile_from_hash,
                'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0])

    def test_get_diskfile_from_hash_no_actual_files(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
        with mock.patch(self._manager_mock('diskfile_cls')), \
                mock.patch(self._manager_mock(
                    'cleanup_ondisk_files')) as cleanup, \
                mock.patch('swift.obj.diskfile.read_metadata') as readmeta:
            cleanup.return_value = {'files': []}
            readmeta.return_value = {'name': '/a/c/o'}
            self.assertRaises(
                DiskFileNotExist,
                self.df_mgr.get_diskfile_from_hash,
                'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0])

    def test_get_diskfile_from_hash_read_metadata_problem(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
        with mock.patch(self._manager_mock('diskfile_cls')), \
                mock.patch(self._manager_mock(
                    'cleanup_ondisk_files')) as cleanup, \
                mock.patch('swift.obj.diskfile.read_metadata') as readmeta:
            cleanup.return_value = {'files': ['1381679759.90941.data']}
            readmeta.side_effect = EOFError()
            self.assertRaises(
                DiskFileNotExist,
                self.df_mgr.get_diskfile_from_hash,
                'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0])

    def test_get_diskfile_from_hash_no_meta_name(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
        with mock.patch(self._manager_mock('diskfile_cls')), \
                mock.patch(self._manager_mock(
                    'cleanup_ondisk_files')) as cleanup, \
                mock.patch('swift.obj.diskfile.read_metadata') as readmeta:
            cleanup.return_value = {'files': ['1381679759.90941.data']}
            readmeta.return_value = {}
            try:
                self.df_mgr.get_diskfile_from_hash(
                    'dev', '9', '9a7175077c01a23ade5956b8a2bba900',
                    POLICIES[0])
            except DiskFileNotExist as err:
                exc = err
            self.assertEqual(str(exc), '')

    def test_get_diskfile_from_hash_bad_meta_name(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
        with mock.patch(self._manager_mock('diskfile_cls')), \
                mock.patch(self._manager_mock(
                    'cleanup_ondisk_files')) as cleanup, \
                mock.patch('swift.obj.diskfile.read_metadata') as readmeta:
            cleanup.return_value = {'files': ['1381679759.90941.data']}
            readmeta.return_value = {'name': 'bad'}
            try:
                self.df_mgr.get_diskfile_from_hash(
                    'dev', '9', '9a7175077c01a23ade5956b8a2bba900',
                    POLICIES[0])
            except DiskFileNotExist as err:
                exc = err
            self.assertEqual(str(exc), '')

    def test_get_diskfile_from_hash(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
        with mock.patch(self._manager_mock('diskfile_cls')) as dfclass, \
                mock.patch(self._manager_mock(
                    'cleanup_ondisk_files')) as cleanup, \
                mock.patch('swift.obj.diskfile.read_metadata') as readmeta:
            cleanup.return_value = {'files': ['1381679759.90941.data']}
            readmeta.return_value = {'name': '/a/c/o'}
            self.df_mgr.get_diskfile_from_hash(
                'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0])
            dfclass.assert_called_once_with(
                self.df_mgr, '/srv/dev/', '9',
                'a', 'c', 'o', policy=POLICIES[0])
            cleanup.assert_called_once_with(
                '/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900',
                604800)
            readmeta.assert_called_once_with(
                '/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900/'
                '1381679759.90941.data')

    def test_listdir_enoent(self):
        oserror = OSError()
        oserror.errno = errno.ENOENT
        self.df_mgr.logger.error = mock.MagicMock()
        with mock.patch('os.listdir', side_effect=oserror):
            self.assertEqual(self.df_mgr._listdir('path'), [])
            self.assertEqual(self.df_mgr.logger.error.mock_calls, [])

    def test_listdir_other_oserror(self):
        oserror = OSError()
        self.df_mgr.logger.error = mock.MagicMock()
        with mock.patch('os.listdir', side_effect=oserror):
            self.assertEqual(self.df_mgr._listdir('path'), [])
            self.df_mgr.logger.error.assert_called_once_with(
                'ERROR: Skipping %r due to error with listdir attempt: %s',
                'path', oserror)

    def test_listdir(self):
        self.df_mgr.logger.error = mock.MagicMock()
        with mock.patch('os.listdir', return_value=['abc', 'def']):
            self.assertEqual(self.df_mgr._listdir('path'), ['abc', 'def'])
            self.assertEqual(self.df_mgr.logger.error.mock_calls, [])

    def test_yield_suffixes_dev_path_fail(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value=None)
        exc = None
        try:
            list(self.df_mgr.yield_suffixes(self.existing_device1, '9', 0))
        except DiskFileDeviceUnavailable as err:
            exc = err
        self.assertEqual(str(exc), '')

    def test_yield_suffixes(self):
        self.df_mgr._listdir = mock.MagicMock(return_value=[
            'abc', 'def', 'ghi', 'abcd', '012'])
        dev = self.existing_device1
        self.assertEqual(
            list(self.df_mgr.yield_suffixes(dev, '9', POLICIES[0])),
            [(self.testdir + '/' + dev + '/objects/9/abc', 'abc'),
             (self.testdir + '/' + dev + '/objects/9/def', 'def'),
             (self.testdir + '/' + dev + '/objects/9/012', '012')])

    def test_yield_hashes_dev_path_fail(self):
        self.df_mgr.get_dev_path = mock.MagicMock(return_value=None)
        exc = None
        try:
            list(self.df_mgr.yield_hashes(self.existing_device1, '9',
                                          POLICIES[0]))
        except DiskFileDeviceUnavailable as err:
            exc = err
        self.assertEqual(str(exc), '')

    def test_yield_hashes_empty(self):
        def _listdir(path):
            return []

        with mock.patch('os.listdir', _listdir):
            self.assertEqual(list(self.df_mgr.yield_hashes(
                self.existing_device1, '9', POLICIES[0])), [])

    def test_yield_hashes_empty_suffixes(self):
        def _listdir(path):
            return []

        with mock.patch('os.listdir', _listdir):
            self.assertEqual(
                list(self.df_mgr.yield_hashes(self.existing_device1, '9',
                                              POLICIES[0],
                                              suffixes=['456'])), [])

    def _check_yield_hashes(self, policy, suffix_map, expected, **kwargs):
        device = self.existing_device1
        part = '9'
        part_path = os.path.join(
            self.testdir, device, diskfile.get_data_dir(policy), part)

        def _listdir(path):
            if path == part_path:
                return suffix_map.keys()
            for suff, hash_map in suffix_map.items():
                if path == os.path.join(part_path, suff):
                    return hash_map.keys()
                for hash_, files in hash_map.items():
                    if path == os.path.join(part_path, suff, hash_):
                        return files
            self.fail('Unexpected listdir of %r' % path)
        expected_items = [
            (os.path.join(part_path, hash_[-3:], hash_), hash_, timestamps)
            for hash_, timestamps in expected.items()]
        with mock.patch('os.listdir', _listdir), \
                mock.patch('os.unlink'):
            df_mgr = self.df_router[policy]
            hash_items = list(df_mgr.yield_hashes(
                device, part, policy, **kwargs))
            expected = sorted(expected_items)
            actual = sorted(hash_items)
            # default list diff easiest to debug
            self.assertEqual(actual, expected)

    def test_yield_hashes_tombstones(self):
        ts_iter = (Timestamp(t) for t in itertools.count(int(time())))
        ts1 = next(ts_iter)
        ts2 = next(ts_iter)
        ts3 = next(ts_iter)
        suffix_map = {
            '27e': {
                '1111111111111111111111111111127e': [
                    ts1.internal + '.ts'],
                '2222222222222222222222222222227e': [
                    ts2.internal + '.ts'],
            },
            'd41': {
                'aaaaaaaaaaaaaaaaaaaaaaaaaaaaad41': []
            },
            'd98': {},
            '00b': {
                '3333333333333333333333333333300b': [
                    ts1.internal + '.ts',
                    ts2.internal + '.ts',
                    ts3.internal + '.ts',
                ]
            },
            '204': {
                'bbbbbbbbbbbbbbbbbbbbbbbbbbbbb204': [
                    ts3.internal + '.ts',
                ]
            }
        }
        expected = {
            '1111111111111111111111111111127e': {'ts_data': ts1.internal},
            '2222222222222222222222222222227e': {'ts_data': ts2.internal},
            '3333333333333333333333333333300b': {'ts_data': ts3.internal},
        }
        for policy in POLICIES:
            self._check_yield_hashes(policy, suffix_map, expected,
                                     suffixes=['27e', '00b'])


@patch_policies
class TestDiskFileManager(DiskFileManagerMixin, unittest.TestCase):

    mgr_cls = diskfile.DiskFileManager

    def test_get_ondisk_files_with_repl_policy(self):
        # Each scenario specifies a list of (filename, extension) tuples. If
        # extension is set then that filename should be returned by the method
        # under test for that extension type.
        scenarios = [[('0000000007.00000.data', '.data')],

                     [('0000000007.00000.ts', '.ts')],

                     # older tombstone is ignored
                     [('0000000007.00000.ts', '.ts'),
                      ('0000000006.00000.ts', False)],

                     # older data is ignored
                     [('0000000007.00000.data', '.data'),
                      ('0000000006.00000.data', False),
                      ('0000000004.00000.ts', False)],

                     # newest meta trumps older meta
                     [('0000000009.00000.meta', '.meta'),
                      ('0000000008.00000.meta', False),
                      ('0000000007.00000.data', '.data'),
                      ('0000000004.00000.ts', False)],

                     # meta older than data is ignored
                     [('0000000007.00000.data', '.data'),
                      ('0000000006.00000.meta', False),
                      ('0000000004.00000.ts', False)],

                     # meta without data is ignored
                     [('0000000007.00000.meta', False, True),
                      ('0000000006.00000.ts', '.ts'),
                      ('0000000004.00000.data', False)],

                     # tombstone trumps meta and data at same timestamp
                     [('0000000006.00000.meta', False),
                      ('0000000006.00000.ts', '.ts'),
                      ('0000000006.00000.data', False)],
                     ]

        self._test_get_ondisk_files(scenarios, POLICIES[0], None)
        self._test_cleanup_ondisk_files_files(scenarios, POLICIES[0])
        self._test_yield_hashes_cleanup(scenarios, POLICIES[0])

    def test_get_ondisk_files_with_stray_meta(self):
        # get_ondisk_files ignores a stray .meta file

        class_under_test = self._get_diskfile(POLICIES[0])
        files = ['0000000007.00000.meta']

        with mock.patch('swift.obj.diskfile.os.listdir', lambda *args: files):
            self.assertRaises(DiskFileNotExist, class_under_test.open)

    def test_verify_ondisk_files(self):
        # ._verify_ondisk_files should only return False if get_ondisk_files
        # has produced a bad set of files due to a bug, so to test it we need
        # to probe it directly.
        mgr = self.df_router[POLICIES.default]
        ok_scenarios = (
            {'ts_file': None, 'data_file': None, 'meta_file': None},
            {'ts_file': None, 'data_file': 'a_file', 'meta_file': None},
            {'ts_file': None, 'data_file': 'a_file', 'meta_file': 'a_file'},
            {'ts_file': 'a_file', 'data_file': None, 'meta_file': None},
        )

        for scenario in ok_scenarios:
            self.assertTrue(mgr._verify_ondisk_files(scenario),
                            'Unexpected result for scenario %s' % scenario)

        # construct every possible invalid combination of results
        vals = (None, 'a_file')
        for ts_file, data_file, meta_file in [
                (a, b, c) for a in vals for b in vals for c in vals]:
            scenario = {
                'ts_file': ts_file,
                'data_file': data_file,
                'meta_file': meta_file}
            if scenario in ok_scenarios:
                continue
            self.assertFalse(mgr._verify_ondisk_files(scenario),
                             'Unexpected result for scenario %s' % scenario)

    def test_parse_on_disk_filename(self):
        mgr = self.df_router[POLICIES.default]
        for ts in (Timestamp('1234567890.00001'),
                   Timestamp('1234567890.00001', offset=17)):
            for ext in ('.meta', '.data', '.ts'):
                fname = '%s%s' % (ts.internal, ext)
                info = mgr.parse_on_disk_filename(fname)
                self.assertEqual(ts, info['timestamp'])
                self.assertEqual(ext, info['ext'])

    def test_parse_on_disk_filename_errors(self):
        mgr = self.df_router[POLICIES.default]
        with self.assertRaises(DiskFileError) as cm:
            mgr.parse_on_disk_filename('junk')
        self.assertEqual("Invalid Timestamp value in filename 'junk'",
                         str(cm.exception))

    def test_cleanup_ondisk_files_reclaim_with_data_files(self):
        # Each scenario specifies a list of (filename, extension, [survives])
        # tuples. If extension is set or 'survives' is True, the filename
        # should still be in the dir after cleanup.
        much_older = Timestamp(time() - 2000).internal
        older = Timestamp(time() - 1001).internal
        newer = Timestamp(time() - 900).internal
        scenarios = [
            # .data files are not reclaimed, ever
            [('%s.data' % older, '.data', True)],
            [('%s.data' % newer, '.data', True)],

            # ... and we could have a mixture of fresh and stale .data
            [('%s.data' % newer, '.data', True),
             ('%s.data' % older, False, False)],

            # tombstone reclaimed despite newer data
            [('%s.data' % newer, '.data', True),
             ('%s.data' % older, False, False),
             ('%s.ts' % much_older, '.ts', False)],

            # .meta not reclaimed if there is a .data file
            [('%s.meta' % older, '.meta'),
             ('%s.data' % much_older, '.data')]]

        self._test_cleanup_ondisk_files_files(scenarios, POLICIES.default,
                                              reclaim_age=1000)

    def test_yield_hashes(self):
        old_ts = '1383180000.12345'
        fresh_ts = Timestamp(time() - 10).internal
        fresher_ts = Timestamp(time() - 1).internal
        suffix_map = {
            'abc': {
                '9373a92d072897b136b3fc06595b4abc': [
                    fresh_ts + '.ts'],
            },
            '456': {
                '9373a92d072897b136b3fc06595b0456': [
                    old_ts + '.data'],
                '9373a92d072897b136b3fc06595b7456': [
                    fresh_ts + '.ts',
                    fresher_ts + '.data'],
            },
            'def': {},
        }
        expected = {
            '9373a92d072897b136b3fc06595b4abc': {'ts_data': fresh_ts},
            '9373a92d072897b136b3fc06595b0456': {'ts_data': old_ts},
            '9373a92d072897b136b3fc06595b7456': {'ts_data': fresher_ts},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected)

    def test_yield_hashes_yields_meta_timestamp(self):
        ts_iter = (Timestamp(t) for t in itertools.count(int(time())))
        ts1 = next(ts_iter)
        ts2 = next(ts_iter)
        ts3 = next(ts_iter)
        suffix_map = {
            'abc': {
                # only tombstone is yield/sync -able
                '9333a92d072897b136b3fc06595b4abc': [
                    ts1.internal + '.ts',
                    ts2.internal + '.meta'],
            },
            '456': {
                # only latest metadata timestamp
                '9444a92d072897b136b3fc06595b0456': [
                    ts1.internal + '.data',
                    ts2.internal + '.meta',
                    ts3.internal + '.meta'],
                # exemplary datadir with .meta
                '9555a92d072897b136b3fc06595b7456': [
                    ts1.internal + '.data',
                    ts2.internal + '.meta'],
            },
        }
        expected = {
            '9333a92d072897b136b3fc06595b4abc':
            {'ts_data': ts1},
            '9444a92d072897b136b3fc06595b0456':
            {'ts_data': ts1, 'ts_meta': ts3},
            '9555a92d072897b136b3fc06595b7456':
            {'ts_data': ts1, 'ts_meta': ts2},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected)

    def test_yield_hashes_yields_content_type_timestamp(self):
        hash_ = '9373a92d072897b136b3fc06595b4abc'
        ts_iter = make_timestamp_iter()
        ts0, ts1, ts2, ts3, ts4 = (next(ts_iter) for _ in range(5))
        data_file = ts1.internal + '.data'

        # no content-type delta
        meta_file = ts2.internal + '.meta'
        suffix_map = {'abc': {hash_: [data_file, meta_file]}}
        expected = {hash_: {'ts_data': ts1,
                            'ts_meta': ts2}}
        self._check_yield_hashes(POLICIES.default, suffix_map, expected)

        # non-zero content-type delta
        delta = ts3.raw - ts2.raw
        meta_file = '%s-%x.meta' % (ts3.internal, delta)
        suffix_map = {'abc': {hash_: [data_file, meta_file]}}
        expected = {hash_: {'ts_data': ts1,
                            'ts_meta': ts3,
                            'ts_ctype': ts2}}
        self._check_yield_hashes(POLICIES.default, suffix_map, expected)

        # zero content-type delta
        meta_file = '%s+0.meta' % ts3.internal
        suffix_map = {'abc': {hash_: [data_file, meta_file]}}
        expected = {hash_: {'ts_data': ts1,
                            'ts_meta': ts3,
                            'ts_ctype': ts3}}
        self._check_yield_hashes(POLICIES.default, suffix_map, expected)

        # content-type in second meta file
        delta = ts3.raw - ts2.raw
        meta_file1 = '%s-%x.meta' % (ts3.internal, delta)
        meta_file2 = '%s.meta' % ts4.internal
        suffix_map = {'abc': {hash_: [data_file, meta_file1, meta_file2]}}
        expected = {hash_: {'ts_data': ts1,
                            'ts_meta': ts4,
                            'ts_ctype': ts2}}
        self._check_yield_hashes(POLICIES.default, suffix_map, expected)

        # obsolete content-type in second meta file, older than data file
        delta = ts3.raw - ts0.raw
        meta_file1 = '%s-%x.meta' % (ts3.internal, delta)
        meta_file2 = '%s.meta' % ts4.internal
        suffix_map = {'abc': {hash_: [data_file, meta_file1, meta_file2]}}
        expected = {hash_: {'ts_data': ts1,
                            'ts_meta': ts4}}
        self._check_yield_hashes(POLICIES.default, suffix_map, expected)

        # obsolete content-type in second meta file, same time as data file
        delta = ts3.raw - ts1.raw
        meta_file1 = '%s-%x.meta' % (ts3.internal, delta)
        meta_file2 = '%s.meta' % ts4.internal
        suffix_map = {'abc': {hash_: [data_file, meta_file1, meta_file2]}}
        expected = {hash_: {'ts_data': ts1,
                            'ts_meta': ts4}}
        self._check_yield_hashes(POLICIES.default, suffix_map, expected)

    def test_yield_hashes_suffix_filter(self):
        # test again with limited suffixes
        old_ts = '1383180000.12345'
        fresh_ts = Timestamp(time() - 10).internal
        fresher_ts = Timestamp(time() - 1).internal
        suffix_map = {
            'abc': {
                '9373a92d072897b136b3fc06595b4abc': [
                    fresh_ts + '.ts'],
            },
            '456': {
                '9373a92d072897b136b3fc06595b0456': [
                    old_ts + '.data'],
                '9373a92d072897b136b3fc06595b7456': [
                    fresh_ts + '.ts',
                    fresher_ts + '.data'],
            },
            'def': {},
        }
        expected = {
            '9373a92d072897b136b3fc06595b0456': {'ts_data': old_ts},
            '9373a92d072897b136b3fc06595b7456': {'ts_data': fresher_ts},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 suffixes=['456'])

    def test_yield_hashes_fails_with_bad_ondisk_filesets(self):
        ts_iter = (Timestamp(t) for t in itertools.count(int(time())))
        ts1 = next(ts_iter)
        suffix_map = {
            '456': {
                '9373a92d072897b136b3fc06595b0456': [
                    ts1.internal + '.data'],
                '9373a92d072897b136b3fc06595ba456': [
                    ts1.internal + '.meta'],
            },
        }
        expected = {
            '9373a92d072897b136b3fc06595b0456': {'ts_data': ts1},
        }
        try:
            self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                     frag_index=2)
            self.fail('Expected AssertionError')
        except AssertionError:
            pass


@patch_policies(with_ec_default=True)
class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase):

    mgr_cls = diskfile.ECDiskFileManager

    def test_get_ondisk_files_with_ec_policy(self):
        # Each scenario specifies a list of (filename, extension, [survives])
        # tuples. If extension is set then that filename should be returned by
        # the method under test for that extension type. If the optional
        # 'survives' is True, the filename should still be in the dir after
        # cleanup.
        scenarios = [[('0000000007.00000.ts', '.ts')],

                     [('0000000007.00000.ts', '.ts'),
                      ('0000000006.00000.ts', False)],

                     # highest frag index is chosen by default
                     [('0000000007.00000.durable', '.durable'),
                      ('0000000007.00000#1.data', '.data'),
                      ('0000000007.00000#0.data', False, True)],

                     # data with no durable is ignored
                     [('0000000007.00000#0.data', False, True)],

                     # data newer than tombstone with no durable is ignored
                     [('0000000007.00000#0.data', False, True),
                      ('0000000006.00000.ts', '.ts', True)],

                     # data newer than durable is ignored
                     [('0000000008.00000#1.data', False, True),
                      ('0000000007.00000.durable', '.durable'),
                      ('0000000007.00000#1.data', '.data'),
                      ('0000000007.00000#0.data', False, True)],

                     # data newer than durable ignored, even if its only data
                     [('0000000008.00000#1.data', False, True),
                      ('0000000007.00000.durable', False, False)],

                     # data older than durable is ignored
                     [('0000000007.00000.durable', '.durable'),
                      ('0000000007.00000#1.data', '.data'),
                      ('0000000006.00000#1.data', False),
                      ('0000000004.00000.ts', False)],

                     # data older than durable ignored, even if its only data
                     [('0000000007.00000.durable', False, False),
                      ('0000000006.00000#1.data', False),
                      ('0000000004.00000.ts', False)],

                     # newer meta trumps older meta
                     [('0000000009.00000.meta', '.meta'),
                      ('0000000008.00000.meta', False),
                      ('0000000007.00000.durable', '.durable'),
                      ('0000000007.00000#14.data', '.data'),
                      ('0000000004.00000.ts', False)],

                     # older meta is ignored
                     [('0000000007.00000.durable', '.durable'),
                      ('0000000007.00000#14.data', '.data'),
                      ('0000000006.00000.meta', False),
                      ('0000000004.00000.ts', False)],

                     # tombstone trumps meta, data, durable at older timestamp
                     [('0000000006.00000.ts', '.ts'),
                      ('0000000005.00000.meta', False),
                      ('0000000004.00000.durable', False),
                      ('0000000004.00000#0.data', False)],

                     # tombstone trumps meta, data, durable at same timestamp
                     [('0000000006.00000.meta', False),
                      ('0000000006.00000.ts', '.ts'),
                      ('0000000006.00000.durable', False),
                      ('0000000006.00000#0.data', False)],

                     # missing durable invalidates data
                     [('0000000006.00000.meta', False, True),
                      ('0000000006.00000#0.data', False, True)]
                     ]

        self._test_get_ondisk_files(scenarios, POLICIES.default, None)
        self._test_cleanup_ondisk_files_files(scenarios, POLICIES.default)
        self._test_yield_hashes_cleanup(scenarios, POLICIES.default)

    def test_get_ondisk_files_with_ec_policy_and_frag_index(self):
        # Each scenario specifies a list of (filename, extension) tuples. If
        # extension is set then that filename should be returned by the method
        # under test for that extension type.
        scenarios = [[('0000000007.00000#2.data', False, True),
                      ('0000000007.00000#1.data', '.data'),
                      ('0000000007.00000#0.data', False, True),
                      ('0000000007.00000.durable', '.durable')],

                     # specific frag newer than durable is ignored
                     [('0000000007.00000#2.data', False, True),
                      ('0000000007.00000#1.data', False, True),
                      ('0000000007.00000#0.data', False, True),
                      ('0000000006.00000.durable', '.durable')],

                     # specific frag older than durable is ignored
                     [('0000000007.00000#2.data', False),
                      ('0000000007.00000#1.data', False),
                      ('0000000007.00000#0.data', False),
                      ('0000000008.00000.durable', '.durable')],

                     # specific frag older than newest durable is ignored
                     # even if is also has a durable
                     [('0000000007.00000#2.data', False),
                      ('0000000007.00000#1.data', False),
                      ('0000000007.00000.durable', False),
                      ('0000000008.00000#0.data', False),
                      ('0000000008.00000.durable', '.durable')],

                     # meta included when frag index is specified
                     [('0000000009.00000.meta', '.meta'),
                      ('0000000007.00000#2.data', False, True),
                      ('0000000007.00000#1.data', '.data'),
                      ('0000000007.00000#0.data', False, True),
                      ('0000000007.00000.durable', '.durable')],

                     # specific frag older than tombstone is ignored
                     [('0000000009.00000.ts', '.ts'),
                      ('0000000007.00000#2.data', False),
                      ('0000000007.00000#1.data', False),
                      ('0000000007.00000#0.data', False),
                      ('0000000007.00000.durable', False)],

                     # no data file returned if specific frag index missing
                     [('0000000007.00000#2.data', False, True),
                      ('0000000007.00000#14.data', False, True),
                      ('0000000007.00000#0.data', False, True),
                      ('0000000007.00000.durable', '.durable')],

                     # meta ignored if specific frag index missing
                     [('0000000008.00000.meta', False, True),
                      ('0000000007.00000#14.data', False, True),
                      ('0000000007.00000#0.data', False, True),
                      ('0000000007.00000.durable', '.durable')],

                     # meta ignored if no data files
                     # Note: this is anomalous, because we are specifying a
                     # frag_index, get_ondisk_files will tolerate .meta with
                     # no .data
                     [('0000000088.00000.meta', False, True),
                      ('0000000077.00000.durable', '.durable')]
                     ]

        self._test_get_ondisk_files(scenarios, POLICIES.default, frag_index=1)
        # note: not calling self._test_cleanup_ondisk_files_files(scenarios, 0)
        # here due to the anomalous scenario as commented above

    def test_cleanup_ondisk_files_reclaim_with_data_files(self):
        # Each scenario specifies a list of (filename, extension, [survives])
        # tuples. If extension is set or 'survives' is True, the filename
        # should still be in the dir after cleanup.
        much_older = Timestamp(time() - 2000).internal
        older = Timestamp(time() - 1001).internal
        newer = Timestamp(time() - 900).internal
        scenarios = [
            # isolated .durable is cleaned up immediately
            [('%s.durable' % newer, False, False)],

            # ...even when other older files are in dir
            [('%s.durable' % older, False, False),
             ('%s.ts' % much_older, False, False)],

            # isolated .data files are cleaned up when stale
            [('%s#2.data' % older, False, False),
             ('%s#4.data' % older, False, False)],

            # ...even when there is an older durable fileset
            [('%s#2.data' % older, False, False),
             ('%s#4.data' % older, False, False),
             ('%s#2.data' % much_older, '.data', True),
             ('%s#4.data' % much_older, False, True),
             ('%s.durable' % much_older, '.durable', True)],

            # ... but preserved if still fresh
            [('%s#2.data' % newer, False, True),
             ('%s#4.data' % newer, False, True)],

            # ... and we could have a mixture of fresh and stale .data
            [('%s#2.data' % newer, False, True),
             ('%s#4.data' % older, False, False)],

            # tombstone reclaimed despite newer non-durable data
            [('%s#2.data' % newer, False, True),
             ('%s#4.data' % older, False, False),
             ('%s.ts' % much_older, '.ts', False)],

            # tombstone reclaimed despite much older durable
            [('%s.ts' % older, '.ts', False),
             ('%s.durable' % much_older, False, False)],

            # .meta not reclaimed if there is durable data
            [('%s.meta' % older, '.meta'),
             ('%s#4.data' % much_older, False, True),
             ('%s.durable' % much_older, '.durable', True)],

            # stale .meta reclaimed along with stale non-durable .data
            [('%s.meta' % older, False, False),
             ('%s#4.data' % much_older, False, False)],

            # stale .meta reclaimed along with stale .durable
            [('%s.meta' % older, False, False),
             ('%s.durable' % much_older, False, False)]]

        self._test_cleanup_ondisk_files_files(scenarios, POLICIES.default,
                                              reclaim_age=1000)

    def test_get_ondisk_files_with_stray_meta(self):
        # get_ondisk_files ignores a stray .meta file
        class_under_test = self._get_diskfile(POLICIES.default)

        @contextmanager
        def create_files(df, files):
            os.makedirs(df._datadir)
            for fname in files:
                fpath = os.path.join(df._datadir, fname)
                with open(fpath, 'w') as f:
                    diskfile.write_metadata(f, {'name': df._name,
                                                'Content-Length': 0})
            yield
            rmtree(df._datadir, ignore_errors=True)

        # sanity
        files = [
            '0000000006.00000#1.data',
            '0000000006.00000.durable',
        ]
        with create_files(class_under_test, files):
            class_under_test.open()

        scenarios = [['0000000007.00000.meta'],

                     ['0000000007.00000.meta',
                      '0000000006.00000.durable'],

                     ['0000000007.00000.meta',
                      '0000000006.00000#1.data'],

                     ['0000000007.00000.meta',
                      '0000000006.00000.durable',
                      '0000000005.00000#1.data']
                     ]
        for files in scenarios:
            with create_files(class_under_test, files):
                try:
                    class_under_test.open()
                except DiskFileNotExist:
                    continue
            self.fail('expected DiskFileNotExist opening %s with %r' % (
                class_under_test.__class__.__name__, files))

    def test_verify_ondisk_files(self):
        # _verify_ondisk_files should only return False if get_ondisk_files
        # has produced a bad set of files due to a bug, so to test it we need
        # to probe it directly.
        mgr = self.df_router[POLICIES.default]
        ok_scenarios = (
            {'ts_file': None, 'data_file': None, 'meta_file': None,
             'durable_frag_set': None},
            {'ts_file': None, 'data_file': 'a_file', 'meta_file': None,
             'durable_frag_set': ['a_file']},
            {'ts_file': None, 'data_file': 'a_file', 'meta_file': 'a_file',
             'durable_frag_set': ['a_file']},
            {'ts_file': 'a_file', 'data_file': None, 'meta_file': None,
             'durable_frag_set': None},
        )

        for scenario in ok_scenarios:
            self.assertTrue(mgr._verify_ondisk_files(scenario),
                            'Unexpected result for scenario %s' % scenario)

        # construct every possible invalid combination of results
        vals = (None, 'a_file')
        for ts_file, data_file, meta_file, durable_frag in [
            (a, b, c, d)
                for a in vals for b in vals for c in vals for d in vals]:
            scenario = {
                'ts_file': ts_file,
                'data_file': data_file,
                'meta_file': meta_file,
                'durable_frag_set': [durable_frag] if durable_frag else None}
            if scenario in ok_scenarios:
                continue
            self.assertFalse(mgr._verify_ondisk_files(scenario),
                             'Unexpected result for scenario %s' % scenario)

    def test_parse_on_disk_filename(self):
        mgr = self.df_router[POLICIES.default]
        for ts in (Timestamp('1234567890.00001'),
                   Timestamp('1234567890.00001', offset=17)):
            for frag in (0, 2, 14):
                fname = '%s#%s.data' % (ts.internal, frag)
                info = mgr.parse_on_disk_filename(fname)
                self.assertEqual(ts, info['timestamp'])
                self.assertEqual('.data', info['ext'])
                self.assertEqual(frag, info['frag_index'])
                self.assertEqual(mgr.make_on_disk_filename(**info), fname)

            for ext in ('.meta', '.durable', '.ts'):
                fname = '%s%s' % (ts.internal, ext)
                info = mgr.parse_on_disk_filename(fname)
                self.assertEqual(ts, info['timestamp'])
                self.assertEqual(ext, info['ext'])
                self.assertIsNone(info['frag_index'])
                self.assertEqual(mgr.make_on_disk_filename(**info), fname)

    def test_parse_on_disk_filename_errors(self):
        mgr = self.df_router[POLICIES.default]
        for ts in (Timestamp('1234567890.00001'),
                   Timestamp('1234567890.00001', offset=17)):
            fname = '%s.data' % ts.internal
            with self.assertRaises(DiskFileError) as cm:
                mgr.parse_on_disk_filename(fname)
            self.assertTrue(str(cm.exception).startswith("Bad fragment index"))

            expected = {
                '': 'bad',
                'foo': 'bad',
                '1.314': 'bad',
                1.314: 'bad',
                -2: 'negative',
                '-2': 'negative',
                None: 'bad',
                'None': 'bad',
            }

            for frag, msg in expected.items():
                fname = '%s#%s.data' % (ts.internal, frag)
                with self.assertRaises(DiskFileError) as cm:
                    mgr.parse_on_disk_filename(fname)
                self.assertIn(msg, str(cm.exception).lower())

        with self.assertRaises(DiskFileError) as cm:
            mgr.parse_on_disk_filename('junk')
        self.assertEqual("Invalid Timestamp value in filename 'junk'",
                         str(cm.exception))

    def test_make_on_disk_filename(self):
        mgr = self.df_router[POLICIES.default]
        for ts in (Timestamp('1234567890.00001'),
                   Timestamp('1234567890.00001', offset=17)):
            for frag in (0, '0', 2, '2', 14, '14'):
                expected = '%s#%s.data' % (ts.internal, frag)
                actual = mgr.make_on_disk_filename(
                    ts, '.data', frag_index=frag)
                self.assertEqual(expected, actual)
                parsed = mgr.parse_on_disk_filename(actual)
                self.assertEqual(parsed, {
                    'timestamp': ts,
                    'frag_index': int(frag),
                    'ext': '.data',
                    'ctype_timestamp': None
                })
                # these functions are inverse
                self.assertEqual(
                    mgr.make_on_disk_filename(**parsed),
                    expected)

                for ext in ('.meta', '.durable', '.ts'):
                    expected = '%s%s' % (ts.internal, ext)
                    # frag index should not be required
                    actual = mgr.make_on_disk_filename(ts, ext)
                    self.assertEqual(expected, actual)
                    # frag index should be ignored
                    actual = mgr.make_on_disk_filename(
                        ts, ext, frag_index=frag)
                    self.assertEqual(expected, actual)
                    parsed = mgr.parse_on_disk_filename(actual)
                    self.assertEqual(parsed, {
                        'timestamp': ts,
                        'frag_index': None,
                        'ext': ext,
                        'ctype_timestamp': None
                    })
                    # these functions are inverse
                    self.assertEqual(
                        mgr.make_on_disk_filename(**parsed),
                        expected)

            actual = mgr.make_on_disk_filename(ts)
            self.assertEqual(ts, actual)

    def test_make_on_disk_filename_with_bad_frag_index(self):
        mgr = self.df_router[POLICIES.default]
        ts = Timestamp('1234567890.00001')
        try:
            # .data requires a frag_index kwarg
            mgr.make_on_disk_filename(ts, '.data')
            self.fail('Expected DiskFileError for missing frag_index')
        except DiskFileError:
            pass

        for frag in (None, 'foo', '1.314', 1.314, -2, '-2'):
            try:
                mgr.make_on_disk_filename(ts, '.data', frag_index=frag)
                self.fail('Expected DiskFileError for frag_index %s' % frag)
            except DiskFileError:
                pass
            for ext in ('.meta', '.durable', '.ts'):
                expected = '%s%s' % (ts.internal, ext)
                # bad frag index should be ignored
                actual = mgr.make_on_disk_filename(ts, ext, frag_index=frag)
                self.assertEqual(expected, actual)

    def test_make_on_disk_filename_for_meta_with_content_type(self):
        # verify .meta filename encodes content-type timestamp
        mgr = self.df_router[POLICIES.default]
        time_ = 1234567890.00001
        for delta in (0.0, .00001, 1.11111):
            t_meta = Timestamp(time_)
            t_type = Timestamp(time_ - delta)
            sign = '-' if delta else '+'
            expected = '%s%s%x.meta' % (t_meta.short, sign, 100000 * delta)
            actual = mgr.make_on_disk_filename(
                t_meta, '.meta', ctype_timestamp=t_type)
            self.assertEqual(expected, actual)
            parsed = mgr.parse_on_disk_filename(actual)
            self.assertEqual(parsed, {
                'timestamp': t_meta,
                'frag_index': None,
                'ext': '.meta',
                'ctype_timestamp': t_type
            })
            # these functions are inverse
            self.assertEqual(
                mgr.make_on_disk_filename(**parsed),
                expected)

    def test_yield_hashes(self):
        old_ts = '1383180000.12345'
        fresh_ts = Timestamp(time() - 10).internal
        fresher_ts = Timestamp(time() - 1).internal
        suffix_map = {
            'abc': {
                '9373a92d072897b136b3fc06595b4abc': [
                    fresh_ts + '.ts'],
            },
            '456': {
                '9373a92d072897b136b3fc06595b0456': [
                    old_ts + '#2.data',
                    old_ts + '.durable'],
                '9373a92d072897b136b3fc06595b7456': [
                    fresh_ts + '.ts',
                    fresher_ts + '#2.data',
                    fresher_ts + '.durable'],
            },
            'def': {},
        }
        expected = {
            '9373a92d072897b136b3fc06595b4abc': {'ts_data': fresh_ts},
            '9373a92d072897b136b3fc06595b0456': {'ts_data': old_ts},
            '9373a92d072897b136b3fc06595b7456': {'ts_data': fresher_ts},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=2)

    def test_yield_hashes_yields_meta_timestamp(self):
        ts_iter = (Timestamp(t) for t in itertools.count(int(time())))
        ts1 = next(ts_iter)
        ts2 = next(ts_iter)
        ts3 = next(ts_iter)
        suffix_map = {
            'abc': {
                '9373a92d072897b136b3fc06595b4abc': [
                    ts1.internal + '.ts',
                    ts2.internal + '.meta'],
            },
            '456': {
                '9373a92d072897b136b3fc06595b0456': [
                    ts1.internal + '#2.data',
                    ts1.internal + '.durable',
                    ts2.internal + '.meta',
                    ts3.internal + '.meta'],
                '9373a92d072897b136b3fc06595b7456': [
                    ts1.internal + '#2.data',
                    ts1.internal + '.durable',
                    ts2.internal + '.meta'],
            },
        }
        expected = {
            '9373a92d072897b136b3fc06595b4abc': {'ts_data': ts1},
            '9373a92d072897b136b3fc06595b0456': {'ts_data': ts1,
                                                 'ts_meta': ts3},
            '9373a92d072897b136b3fc06595b7456': {'ts_data': ts1,
                                                 'ts_meta': ts2},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected)

        # but meta timestamp is *not* returned if specified frag index
        # is not found
        expected = {
            '9373a92d072897b136b3fc06595b4abc': {'ts_data': ts1},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=3)

    def test_yield_hashes_suffix_filter(self):
        # test again with limited suffixes
        old_ts = '1383180000.12345'
        fresh_ts = Timestamp(time() - 10).internal
        fresher_ts = Timestamp(time() - 1).internal
        suffix_map = {
            'abc': {
                '9373a92d072897b136b3fc06595b4abc': [
                    fresh_ts + '.ts'],
            },
            '456': {
                '9373a92d072897b136b3fc06595b0456': [
                    old_ts + '#2.data',
                    old_ts + '.durable'],
                '9373a92d072897b136b3fc06595b7456': [
                    fresh_ts + '.ts',
                    fresher_ts + '#2.data',
                    fresher_ts + '.durable'],
            },
            'def': {},
        }
        expected = {
            '9373a92d072897b136b3fc06595b0456': {'ts_data': old_ts},
            '9373a92d072897b136b3fc06595b7456': {'ts_data': fresher_ts},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 suffixes=['456'], frag_index=2)

    def test_yield_hashes_skips_missing_durable(self):
        ts_iter = (Timestamp(t) for t in itertools.count(int(time())))
        ts1 = next(ts_iter)
        suffix_map = {
            '456': {
                '9373a92d072897b136b3fc06595b0456': [
                    ts1.internal + '#2.data',
                    ts1.internal + '.durable'],
                '9373a92d072897b136b3fc06595b7456': [
                    ts1.internal + '#2.data'],
            },
        }
        expected = {
            '9373a92d072897b136b3fc06595b0456': {'ts_data': ts1},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=2)

        # if we add a durable it shows up
        suffix_map['456']['9373a92d072897b136b3fc06595b7456'].append(
            ts1.internal + '.durable')
        expected = {
            '9373a92d072897b136b3fc06595b0456': {'ts_data': ts1},
            '9373a92d072897b136b3fc06595b7456': {'ts_data': ts1},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=2)

    def test_yield_hashes_skips_data_without_durable(self):
        ts_iter = (Timestamp(t) for t in itertools.count(int(time())))
        ts1 = next(ts_iter)
        ts2 = next(ts_iter)
        ts3 = next(ts_iter)
        suffix_map = {
            '456': {
                '9373a92d072897b136b3fc06595b0456': [
                    ts1.internal + '#2.data',
                    ts1.internal + '.durable',
                    ts2.internal + '#2.data',
                    ts3.internal + '#2.data'],
            },
        }
        expected = {
            '9373a92d072897b136b3fc06595b0456': {'ts_data': ts1},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=None)
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=2)

        # if we add a durable then newer data shows up
        suffix_map['456']['9373a92d072897b136b3fc06595b0456'].append(
            ts2.internal + '.durable')
        expected = {
            '9373a92d072897b136b3fc06595b0456': {'ts_data': ts2},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=None)
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=2)

    def test_yield_hashes_ignores_bad_ondisk_filesets(self):
        # this differs from DiskFileManager.yield_hashes which will fail
        # when encountering a bad on-disk file set
        ts_iter = (Timestamp(t) for t in itertools.count(int(time())))
        ts1 = next(ts_iter)
        ts2 = next(ts_iter)
        suffix_map = {
            '456': {
                # this one is fine
                '9333a92d072897b136b3fc06595b0456': [
                    ts1.internal + '#2.data',
                    ts1.internal + '.durable'],
                # missing frag index
                '9444a92d072897b136b3fc06595b7456': [
                    ts1.internal + '.data'],
                # junk
                '9555a92d072897b136b3fc06595b8456': [
                    'junk_file'],
                # missing .durable
                '9666a92d072897b136b3fc06595b9456': [
                    ts1.internal + '#2.data',
                    ts2.internal + '.meta'],
                # .meta files w/o .data files can't be opened, and are ignored
                '9777a92d072897b136b3fc06595ba456': [
                    ts1.internal + '.meta'],
                # multiple meta files with no data
                '9888a92d072897b136b3fc06595bb456': [
                    ts1.internal + '.meta',
                    ts2.internal + '.meta'],
                # this is good with meta
                '9999a92d072897b136b3fc06595bb456': [
                    ts1.internal + '#2.data',
                    ts1.internal + '.durable',
                    ts2.internal + '.meta'],
                # this one is wrong frag index
                '9aaaa92d072897b136b3fc06595b0456': [
                    ts1.internal + '#7.data',
                    ts1.internal + '.durable'],
            },
        }
        expected = {
            '9333a92d072897b136b3fc06595b0456': {'ts_data': ts1},
            '9999a92d072897b136b3fc06595bb456': {'ts_data': ts1,
                                                 'ts_meta': ts2},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=2)

    def test_yield_hashes_filters_frag_index(self):
        ts_iter = (Timestamp(t) for t in itertools.count(int(time())))
        ts1 = next(ts_iter)
        ts2 = next(ts_iter)
        ts3 = next(ts_iter)
        suffix_map = {
            '27e': {
                '1111111111111111111111111111127e': [
                    ts1.internal + '#2.data',
                    ts1.internal + '#3.data',
                    ts1.internal + '.durable',
                ],
                '2222222222222222222222222222227e': [
                    ts1.internal + '#2.data',
                    ts1.internal + '.durable',
                    ts2.internal + '#2.data',
                    ts2.internal + '.durable',
                ],
            },
            'd41': {
                'aaaaaaaaaaaaaaaaaaaaaaaaaaaaad41': [
                    ts1.internal + '#3.data',
                    ts1.internal + '.durable',
                ],
            },
            '00b': {
                '3333333333333333333333333333300b': [
                    ts1.internal + '#2.data',
                    ts2.internal + '#2.data',
                    ts3.internal + '#2.data',
                    ts3.internal + '.durable',
                ],
            },
        }
        expected = {
            '1111111111111111111111111111127e': {'ts_data': ts1},
            '2222222222222222222222222222227e': {'ts_data': ts2},
            '3333333333333333333333333333300b': {'ts_data': ts3},
        }
        self._check_yield_hashes(POLICIES.default, suffix_map, expected,
                                 frag_index=2)

    def test_get_diskfile_from_hash_frag_index_filter(self):
        df = self._get_diskfile(POLICIES.default)
        hash_ = os.path.basename(df._datadir)
        self.assertRaises(DiskFileNotExist,
                          self.df_mgr.get_diskfile_from_hash,
                          self.existing_device1, '0', hash_,
                          POLICIES.default)  # sanity
        frag_index = 7
        timestamp = Timestamp(time())
        for frag_index in (4, 7):
            with df.create() as writer:
                data = 'test_data'
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': timestamp.internal,
                    'Content-Length': len(data),
                    'X-Object-Sysmeta-Ec-Frag-Index': str(frag_index),
                }
                writer.put(metadata)
                writer.commit(timestamp)

        df4 = self.df_mgr.get_diskfile_from_hash(
            self.existing_device1, '0', hash_, POLICIES.default, frag_index=4)
        self.assertEqual(df4._frag_index, 4)
        self.assertEqual(
            df4.read_metadata()['X-Object-Sysmeta-Ec-Frag-Index'], '4')
        df7 = self.df_mgr.get_diskfile_from_hash(
            self.existing_device1, '0', hash_, POLICIES.default, frag_index=7)
        self.assertEqual(df7._frag_index, 7)
        self.assertEqual(
            df7.read_metadata()['X-Object-Sysmeta-Ec-Frag-Index'], '7')


class DiskFileMixin(BaseDiskFileTestMixin):

    # set mgr_cls on subclasses
    mgr_cls = None

    def setUp(self):
        """Set up for testing swift.obj.diskfile"""
        self.tmpdir = mkdtemp()
        self.testdir = os.path.join(
            self.tmpdir, 'tmp_test_obj_server_DiskFile')
        self.existing_device = 'sda1'
        for policy in POLICIES:
            mkdirs(os.path.join(self.testdir, self.existing_device,
                                diskfile.get_tmp_dir(policy)))
        self._orig_tpool_exc = tpool.execute
        tpool.execute = lambda f, *args, **kwargs: f(*args, **kwargs)
        self.conf = dict(devices=self.testdir, mount_check='false',
                         keep_cache_size=2 * 1024, mb_per_sync=1)
        self.logger = debug_logger('test-' + self.__class__.__name__)
        self.df_mgr = self.mgr_cls(self.conf, self.logger)
        self.df_router = diskfile.DiskFileRouter(self.conf, self.logger)
        self._ts_iter = (Timestamp(t) for t in
                         itertools.count(int(time())))

    def ts(self):
        """
        Timestamps - forever.
        """
        return next(self._ts_iter)

    def tearDown(self):
        """Tear down for testing swift.obj.diskfile"""
        rmtree(self.tmpdir, ignore_errors=1)
        tpool.execute = self._orig_tpool_exc

    def _create_ondisk_file(self, df, data, timestamp, metadata=None,
                            ctype_timestamp=None,
                            ext='.data'):
        mkdirs(df._datadir)
        if timestamp is None:
            timestamp = time()
        timestamp = Timestamp(timestamp)
        if not metadata:
            metadata = {}
        if 'X-Timestamp' not in metadata:
            metadata['X-Timestamp'] = timestamp.internal
        if 'ETag' not in metadata:
            etag = md5()
            etag.update(data)
            metadata['ETag'] = etag.hexdigest()
        if 'name' not in metadata:
            metadata['name'] = '/a/c/o'
        if 'Content-Length' not in metadata:
            metadata['Content-Length'] = str(len(data))
        filename = timestamp.internal
        if ext == '.data' and df.policy.policy_type == EC_POLICY:
            filename = '%s#%s' % (timestamp.internal, df._frag_index)
        if ctype_timestamp:
            metadata.update(
                {'Content-Type-Timestamp':
                 Timestamp(ctype_timestamp).internal})
            filename = encode_timestamps(timestamp,
                                         Timestamp(ctype_timestamp),
                                         explicit=True)
        data_file = os.path.join(df._datadir, filename + ext)
        with open(data_file, 'wb') as f:
            f.write(data)
            xattr.setxattr(f.fileno(), diskfile.METADATA_KEY,
                           pickle.dumps(metadata, diskfile.PICKLE_PROTOCOL))

    def _simple_get_diskfile(self, partition='0', account='a', container='c',
                             obj='o', policy=None, frag_index=None):
        policy = policy or POLICIES.default
        df_mgr = self.df_router[policy]
        if policy.policy_type == EC_POLICY and frag_index is None:
            frag_index = 2
        return df_mgr.get_diskfile(self.existing_device, partition,
                                   account, container, obj,
                                   policy=policy, frag_index=frag_index)

    def _create_test_file(self, data, timestamp=None, metadata=None,
                          account='a', container='c', obj='o'):
        if metadata is None:
            metadata = {}
        metadata.setdefault('name', '/%s/%s/%s' % (account, container, obj))
        df = self._simple_get_diskfile(account=account, container=container,
                                       obj=obj)
        if timestamp is None:
            timestamp = time()
        timestamp = Timestamp(timestamp)
        with df.create() as writer:
            new_metadata = {
                'ETag': md5(data).hexdigest(),
                'X-Timestamp': timestamp.internal,
                'Content-Length': len(data),
            }
            new_metadata.update(metadata)
            writer.write(data)
            writer.put(new_metadata)
            writer.commit(timestamp)
        df.open()
        return df

    def test_get_dev_path(self):
        self.df_mgr.devices = '/srv'
        device = 'sda1'
        dev_path = os.path.join(self.df_mgr.devices, device)

        mount_check = None
        self.df_mgr.mount_check = True
        with mock.patch('swift.obj.diskfile.check_mount',
                        mock.MagicMock(return_value=False)):
            self.assertEqual(self.df_mgr.get_dev_path(device, mount_check),
                             None)
        with mock.patch('swift.obj.diskfile.check_mount',
                        mock.MagicMock(return_value=True)):
            self.assertEqual(self.df_mgr.get_dev_path(device, mount_check),
                             dev_path)

        self.df_mgr.mount_check = False
        with mock.patch('swift.obj.diskfile.check_dir',
                        mock.MagicMock(return_value=False)):
            self.assertEqual(self.df_mgr.get_dev_path(device, mount_check),
                             None)
        with mock.patch('swift.obj.diskfile.check_dir',
                        mock.MagicMock(return_value=True)):
            self.assertEqual(self.df_mgr.get_dev_path(device, mount_check),
                             dev_path)

        mount_check = True
        with mock.patch('swift.obj.diskfile.check_mount',
                        mock.MagicMock(return_value=False)):
            self.assertEqual(self.df_mgr.get_dev_path(device, mount_check),
                             None)
        with mock.patch('swift.obj.diskfile.check_mount',
                        mock.MagicMock(return_value=True)):
            self.assertEqual(self.df_mgr.get_dev_path(device, mount_check),
                             dev_path)

        mount_check = False
        self.assertEqual(self.df_mgr.get_dev_path(device, mount_check),
                         dev_path)

    def test_open_not_exist(self):
        df = self._simple_get_diskfile()
        self.assertRaises(DiskFileNotExist, df.open)

    def test_open_expired(self):
        self.assertRaises(DiskFileExpired,
                          self._create_test_file,
                          '1234567890', metadata={'X-Delete-At': '0'})

    def test_open_not_expired(self):
        try:
            self._create_test_file(
                '1234567890', metadata={'X-Delete-At': str(2 * int(time()))})
        except SwiftException as err:
            self.fail("Unexpected swift exception raised: %r" % err)

    def test_get_metadata(self):
        timestamp = self.ts().internal
        df = self._create_test_file('1234567890', timestamp=timestamp)
        md = df.get_metadata()
        self.assertEqual(md['X-Timestamp'], timestamp)

    def test_read_metadata(self):
        timestamp = self.ts().internal
        self._create_test_file('1234567890', timestamp=timestamp)
        df = self._simple_get_diskfile()
        md = df.read_metadata()
        self.assertEqual(md['X-Timestamp'], timestamp)

    def test_read_metadata_no_xattr(self):
        def mock_getxattr(*args, **kargs):
            error_num = errno.ENOTSUP if hasattr(errno, 'ENOTSUP') else \
                errno.EOPNOTSUPP
            raise IOError(error_num, "Operation not supported")

        with mock.patch('xattr.getxattr', mock_getxattr):
            self.assertRaises(
                DiskFileXattrNotSupported,
                diskfile.read_metadata, 'n/a')

    def test_get_metadata_not_opened(self):
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotOpen):
            df.get_metadata()

    def test_get_datafile_metadata(self):
        ts_iter = make_timestamp_iter()
        body = '1234567890'
        ts_data = next(ts_iter)
        metadata = {'X-Object-Meta-Test': 'test1',
                    'X-Object-Sysmeta-Test': 'test1'}
        df = self._create_test_file(body, timestamp=ts_data.internal,
                                    metadata=metadata)
        expected = df.get_metadata()
        ts_meta = next(ts_iter)
        df.write_metadata({'X-Timestamp': ts_meta.internal,
                           'X-Object-Meta-Test': 'changed',
                           'X-Object-Sysmeta-Test': 'ignored'})
        df.open()
        self.assertEqual(expected, df.get_datafile_metadata())
        expected.update({'X-Timestamp': ts_meta.internal,
                         'X-Object-Meta-Test': 'changed'})
        self.assertEqual(expected, df.get_metadata())

    def test_get_datafile_metadata_not_opened(self):
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotOpen):
            df.get_datafile_metadata()

    def test_get_metafile_metadata(self):
        ts_iter = make_timestamp_iter()
        body = '1234567890'
        ts_data = next(ts_iter)
        metadata = {'X-Object-Meta-Test': 'test1',
                    'X-Object-Sysmeta-Test': 'test1'}
        df = self._create_test_file(body, timestamp=ts_data.internal,
                                    metadata=metadata)
        self.assertIsNone(df.get_metafile_metadata())

        # now create a meta file
        ts_meta = next(ts_iter)
        df.write_metadata({'X-Timestamp': ts_meta.internal,
                           'X-Object-Meta-Test': 'changed'})
        df.open()
        expected = {'X-Timestamp': ts_meta.internal,
                    'X-Object-Meta-Test': 'changed'}
        self.assertEqual(expected, df.get_metafile_metadata())

    def test_get_metafile_metadata_not_opened(self):
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotOpen):
            df.get_metafile_metadata()

    def test_not_opened(self):
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotOpen):
            with df:
                pass

    def test_disk_file_default_disallowed_metadata(self):
        # build an object with some meta (at t0+1s)
        orig_metadata = {'X-Object-Meta-Key1': 'Value1',
                         'X-Object-Transient-Sysmeta-KeyA': 'ValueA',
                         'Content-Type': 'text/garbage'}
        df = self._get_open_disk_file(ts=self.ts().internal,
                                      extra_metadata=orig_metadata)
        with df.open():
            self.assertEqual('1024', df._metadata['Content-Length'])
        # write some new metadata (fast POST, don't send orig meta, at t0+1)
        df = self._simple_get_diskfile()
        df.write_metadata({'X-Timestamp': self.ts().internal,
                           'X-Object-Transient-Sysmeta-KeyB': 'ValueB',
                           'X-Object-Meta-Key2': 'Value2'})
        df = self._simple_get_diskfile()
        with df.open():
            # non-fast-post updateable keys are preserved
            self.assertEqual('text/garbage', df._metadata['Content-Type'])
            # original fast-post updateable keys are removed
            self.assertNotIn('X-Object-Meta-Key1', df._metadata)
            self.assertNotIn('X-Object-Transient-Sysmeta-KeyA', df._metadata)
            # new fast-post updateable keys are added
            self.assertEqual('Value2', df._metadata['X-Object-Meta-Key2'])
            self.assertEqual('ValueB',
                             df._metadata['X-Object-Transient-Sysmeta-KeyB'])

    def test_disk_file_preserves_sysmeta(self):
        # build an object with some meta (at t0)
        orig_metadata = {'X-Object-Sysmeta-Key1': 'Value1',
                         'Content-Type': 'text/garbage'}
        df = self._get_open_disk_file(ts=self.ts().internal,
                                      extra_metadata=orig_metadata)
        with df.open():
            self.assertEqual('1024', df._metadata['Content-Length'])
        # write some new metadata (fast POST, don't send orig meta, at t0+1s)
        df = self._simple_get_diskfile()
        df.write_metadata({'X-Timestamp': self.ts().internal,
                           'X-Object-Sysmeta-Key1': 'Value2',
                           'X-Object-Meta-Key3': 'Value3'})
        df = self._simple_get_diskfile()
        with df.open():
            # non-fast-post updateable keys are preserved
            self.assertEqual('text/garbage', df._metadata['Content-Type'])
            # original sysmeta keys are preserved
            self.assertEqual('Value1', df._metadata['X-Object-Sysmeta-Key1'])

    def test_disk_file_reader_iter(self):
        df = self._create_test_file('1234567890')
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        self.assertEqual(''.join(reader), '1234567890')
        self.assertEqual(quarantine_msgs, [])

    def test_disk_file_reader_iter_w_quarantine(self):
        df = self._create_test_file('1234567890')

        def raise_dfq(m):
            raise DiskFileQuarantined(m)

        reader = df.reader(_quarantine_hook=raise_dfq)
        reader._obj_size += 1
        self.assertRaises(DiskFileQuarantined, ''.join, reader)

    def test_disk_file_app_iter_corners(self):
        df = self._create_test_file('1234567890')
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        self.assertEqual(''.join(reader.app_iter_range(0, None)),
                         '1234567890')
        self.assertEqual(quarantine_msgs, [])
        df = self._simple_get_diskfile()
        with df.open():
            reader = df.reader()
            self.assertEqual(''.join(reader.app_iter_range(5, None)), '67890')

    def test_disk_file_app_iter_range_w_none(self):
        df = self._create_test_file('1234567890')
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        self.assertEqual(''.join(reader.app_iter_range(None, None)),
                         '1234567890')
        self.assertEqual(quarantine_msgs, [])

    def test_disk_file_app_iter_partial_closes(self):
        df = self._create_test_file('1234567890')
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        it = reader.app_iter_range(0, 5)
        self.assertEqual(''.join(it), '12345')
        self.assertEqual(quarantine_msgs, [])
        self.assertTrue(reader._fp is None)

    def test_disk_file_app_iter_ranges(self):
        df = self._create_test_file('012345678911234567892123456789')
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        it = reader.app_iter_ranges([(0, 10), (10, 20), (20, 30)],
                                    'plain/text',
                                    '\r\n--someheader\r\n', 30)
        value = ''.join(it)
        self.assertIn('0123456789', value)
        self.assertIn('1123456789', value)
        self.assertIn('2123456789', value)
        self.assertEqual(quarantine_msgs, [])

    def test_disk_file_app_iter_ranges_w_quarantine(self):
        df = self._create_test_file('012345678911234567892123456789')
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        reader._obj_size += 1
        it = reader.app_iter_ranges([(0, 30)],
                                    'plain/text',
                                    '\r\n--someheader\r\n', 30)
        value = ''.join(it)
        self.assertIn('0123456789', value)
        self.assertIn('1123456789', value)
        self.assertIn('2123456789', value)
        self.assertEqual(quarantine_msgs,
                         ["Bytes read: 30, does not match metadata: 31"])

    def test_disk_file_app_iter_ranges_w_no_etag_quarantine(self):
        df = self._create_test_file('012345678911234567892123456789')
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        it = reader.app_iter_ranges([(0, 10)],
                                    'plain/text',
                                    '\r\n--someheader\r\n', 30)
        value = ''.join(it)
        self.assertIn('0123456789', value)
        self.assertEqual(quarantine_msgs, [])

    def test_disk_file_app_iter_ranges_edges(self):
        df = self._create_test_file('012345678911234567892123456789')
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        it = reader.app_iter_ranges([(3, 10), (0, 2)], 'application/whatever',
                                    '\r\n--someheader\r\n', 30)
        value = ''.join(it)
        self.assertIn('3456789', value)
        self.assertIn('01', value)
        self.assertEqual(quarantine_msgs, [])

    def test_disk_file_large_app_iter_ranges(self):
        # This test case is to make sure that the disk file app_iter_ranges
        # method all the paths being tested.
        long_str = '01234567890' * 65536
        target_strs = ['3456789', long_str[0:65590]]
        df = self._create_test_file(long_str)
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        it = reader.app_iter_ranges([(3, 10), (0, 65590)], 'plain/text',
                                    '5e816ff8b8b8e9a5d355497e5d9e0301', 655360)

        # The produced string actually missing the MIME headers
        # need to add these headers to make it as real MIME message.
        # The body of the message is produced by method app_iter_ranges
        # off of DiskFile object.
        header = ''.join(['Content-Type: multipart/byteranges;',
                          'boundary=',
                          '5e816ff8b8b8e9a5d355497e5d9e0301\r\n'])

        value = header + ''.join(it)
        self.assertEqual(quarantine_msgs, [])

        parts = map(lambda p: p.get_payload(decode=True),
                    email.message_from_string(value).walk())[1:3]
        self.assertEqual(parts, target_strs)

    def test_disk_file_app_iter_ranges_empty(self):
        # This test case tests when empty value passed into app_iter_ranges
        # When ranges passed into the method is either empty array or None,
        # this method will yield empty string
        df = self._create_test_file('012345678911234567892123456789')
        quarantine_msgs = []
        reader = df.reader(_quarantine_hook=quarantine_msgs.append)
        it = reader.app_iter_ranges([], 'application/whatever',
                                    '\r\n--someheader\r\n', 100)
        self.assertEqual(''.join(it), '')

        df = self._simple_get_diskfile()
        with df.open():
            reader = df.reader()
            it = reader.app_iter_ranges(None, 'app/something',
                                        '\r\n--someheader\r\n', 150)
            self.assertEqual(''.join(it), '')
            self.assertEqual(quarantine_msgs, [])

    def test_disk_file_mkstemp_creates_dir(self):
        for policy in POLICIES:
            tmpdir = os.path.join(self.testdir, self.existing_device,
                                  diskfile.get_tmp_dir(policy))
            os.rmdir(tmpdir)
            df = self._simple_get_diskfile(policy=policy)
            with df.create():
                self.assertTrue(os.path.exists(tmpdir))

    def _get_open_disk_file(self, invalid_type=None, obj_name='o', fsize=1024,
                            csize=8, mark_deleted=False, prealloc=False,
                            ts=None, mount_check=False, extra_metadata=None,
                            policy=None, frag_index=None, data=None,
                            commit=True):
        '''returns a DiskFile'''
        policy = policy or POLICIES.legacy
        df = self._simple_get_diskfile(obj=obj_name, policy=policy,
                                       frag_index=frag_index)
        data = data or '0' * fsize
        etag = md5()
        if ts:
            timestamp = Timestamp(ts)
        else:
            timestamp = Timestamp(time())
        if prealloc:
            prealloc_size = fsize
        else:
            prealloc_size = None

        with df.create(size=prealloc_size) as writer:
            upload_size = writer.write(data)
            etag.update(data)
            etag = etag.hexdigest()
            metadata = {
                'ETag': etag,
                'X-Timestamp': timestamp.internal,
                'Content-Length': str(upload_size),
            }
            metadata.update(extra_metadata or {})
            writer.put(metadata)
            if invalid_type == 'ETag':
                etag = md5()
                etag.update('1' + '0' * (fsize - 1))
                etag = etag.hexdigest()
                metadata['ETag'] = etag
                diskfile.write_metadata(writer._fd, metadata)
            elif invalid_type == 'Content-Length':
                metadata['Content-Length'] = fsize - 1
                diskfile.write_metadata(writer._fd, metadata)
            elif invalid_type == 'Bad-Content-Length':
                metadata['Content-Length'] = 'zero'
                diskfile.write_metadata(writer._fd, metadata)
            elif invalid_type == 'Missing-Content-Length':
                del metadata['Content-Length']
                diskfile.write_metadata(writer._fd, metadata)
            elif invalid_type == 'Bad-X-Delete-At':
                metadata['X-Delete-At'] = 'bad integer'
                diskfile.write_metadata(writer._fd, metadata)
            if commit:
                writer.commit(timestamp)

        if mark_deleted:
            df.delete(timestamp)

        data_files = [os.path.join(df._datadir, fname)
                      for fname in sorted(os.listdir(df._datadir),
                                          reverse=True)
                      if fname.endswith('.data')]
        if invalid_type == 'Corrupt-Xattrs':
            # We have to go below read_metadata/write_metadata to get proper
            # corruption.
            meta_xattr = xattr.getxattr(data_files[0], "user.swift.metadata")
            wrong_byte = 'X' if meta_xattr[0] != 'X' else 'Y'
            xattr.setxattr(data_files[0], "user.swift.metadata",
                           wrong_byte + meta_xattr[1:])
        elif invalid_type == 'Truncated-Xattrs':
            meta_xattr = xattr.getxattr(data_files[0], "user.swift.metadata")
            xattr.setxattr(data_files[0], "user.swift.metadata",
                           meta_xattr[:-1])
        elif invalid_type == 'Missing-Name':
            md = diskfile.read_metadata(data_files[0])
            del md['name']
            diskfile.write_metadata(data_files[0], md)
        elif invalid_type == 'Bad-Name':
            md = diskfile.read_metadata(data_files[0])
            md['name'] = md['name'] + 'garbage'
            diskfile.write_metadata(data_files[0], md)

        self.conf['disk_chunk_size'] = csize
        self.conf['mount_check'] = mount_check
        self.df_mgr = self.mgr_cls(self.conf, self.logger)
        self.df_router = diskfile.DiskFileRouter(self.conf, self.logger)

        # actual on disk frag_index may have been set by metadata
        frag_index = metadata.get('X-Object-Sysmeta-Ec-Frag-Index',
                                  frag_index)
        df = self._simple_get_diskfile(obj=obj_name, policy=policy,
                                       frag_index=frag_index)
        df.open()

        if invalid_type == 'Zero-Byte':
            fp = open(df._data_file, 'w')
            fp.close()
        df.unit_test_len = fsize
        return df

    def test_keep_cache(self):
        df = self._get_open_disk_file(fsize=65)
        with mock.patch("swift.obj.diskfile.drop_buffer_cache") as foo:
            for _ in df.reader():
                pass
            self.assertTrue(foo.called)

        df = self._get_open_disk_file(fsize=65)
        with mock.patch("swift.obj.diskfile.drop_buffer_cache") as bar:
            for _ in df.reader(keep_cache=False):
                pass
            self.assertTrue(bar.called)

        df = self._get_open_disk_file(fsize=65)
        with mock.patch("swift.obj.diskfile.drop_buffer_cache") as boo:
            for _ in df.reader(keep_cache=True):
                pass
            self.assertFalse(boo.called)

        df = self._get_open_disk_file(fsize=5 * 1024, csize=256)
        with mock.patch("swift.obj.diskfile.drop_buffer_cache") as goo:
            for _ in df.reader(keep_cache=True):
                pass
            self.assertTrue(goo.called)

    def test_quarantine_valids(self):

        def verify(*args, **kwargs):
            try:
                df = self._get_open_disk_file(**kwargs)
                reader = df.reader()
                for chunk in reader:
                    pass
            except DiskFileQuarantined:
                self.fail(
                    "Unexpected quarantining occurred: args=%r, kwargs=%r" % (
                        args, kwargs))
            else:
                pass

        verify(obj_name='1')

        verify(obj_name='2', csize=1)

        verify(obj_name='3', csize=100000)

    def run_quarantine_invalids(self, invalid_type):

        def verify(*args, **kwargs):
            open_exc = invalid_type in ('Content-Length', 'Bad-Content-Length',
                                        'Corrupt-Xattrs', 'Truncated-Xattrs',
                                        'Missing-Name', 'Bad-X-Delete-At')
            open_collision = invalid_type == 'Bad-Name'
            reader = None
            quarantine_msgs = []
            try:
                df = self._get_open_disk_file(**kwargs)
                reader = df.reader(_quarantine_hook=quarantine_msgs.append)
            except DiskFileQuarantined as err:
                if not open_exc:
                    self.fail(
                        "Unexpected DiskFileQuarantine raised: %r" % err)
                return
            except DiskFileCollision as err:
                if not open_collision:
                    self.fail(
                        "Unexpected DiskFileCollision raised: %r" % err)
                return
            else:
                if open_exc:
                    self.fail("Expected DiskFileQuarantine exception")
            try:
                for chunk in reader:
                    pass
            except DiskFileQuarantined as err:
                self.fail("Unexpected DiskFileQuarantine raised: :%r" % err)
            else:
                if not open_exc:
                    self.assertEqual(1, len(quarantine_msgs))

        verify(invalid_type=invalid_type, obj_name='1')

        verify(invalid_type=invalid_type, obj_name='2', csize=1)

        verify(invalid_type=invalid_type, obj_name='3', csize=100000)

        verify(invalid_type=invalid_type, obj_name='4')

        def verify_air(params, start=0, adjustment=0):
            """verify (a)pp (i)ter (r)ange"""
            open_exc = invalid_type in ('Content-Length', 'Bad-Content-Length',
                                        'Corrupt-Xattrs', 'Truncated-Xattrs',
                                        'Missing-Name', 'Bad-X-Delete-At')
            open_collision = invalid_type == 'Bad-Name'
            reader = None
            try:
                df = self._get_open_disk_file(**params)
                reader = df.reader()
            except DiskFileQuarantined as err:
                if not open_exc:
                    self.fail(
                        "Unexpected DiskFileQuarantine raised: %r" % err)
                return
            except DiskFileCollision as err:
                if not open_collision:
                    self.fail(
                        "Unexpected DiskFileCollision raised: %r" % err)
                return
            else:
                if open_exc:
                    self.fail("Expected DiskFileQuarantine exception")
            try:
                for chunk in reader.app_iter_range(
                        start,
                        df.unit_test_len + adjustment):
                    pass
            except DiskFileQuarantined as err:
                self.fail("Unexpected DiskFileQuarantine raised: :%r" % err)

        verify_air(dict(invalid_type=invalid_type, obj_name='5'))

        verify_air(dict(invalid_type=invalid_type, obj_name='6'), 0, 100)

        verify_air(dict(invalid_type=invalid_type, obj_name='7'), 1)

        verify_air(dict(invalid_type=invalid_type, obj_name='8'), 0, -1)

        verify_air(dict(invalid_type=invalid_type, obj_name='8'), 1, 1)

    def test_quarantine_corrupt_xattrs(self):
        self.run_quarantine_invalids('Corrupt-Xattrs')

    def test_quarantine_truncated_xattrs(self):
        self.run_quarantine_invalids('Truncated-Xattrs')

    def test_quarantine_invalid_etag(self):
        self.run_quarantine_invalids('ETag')

    def test_quarantine_invalid_missing_name(self):
        self.run_quarantine_invalids('Missing-Name')

    def test_quarantine_invalid_bad_name(self):
        self.run_quarantine_invalids('Bad-Name')

    def test_quarantine_invalid_bad_x_delete_at(self):
        self.run_quarantine_invalids('Bad-X-Delete-At')

    def test_quarantine_invalid_content_length(self):
        self.run_quarantine_invalids('Content-Length')

    def test_quarantine_invalid_content_length_bad(self):
        self.run_quarantine_invalids('Bad-Content-Length')

    def test_quarantine_invalid_zero_byte(self):
        self.run_quarantine_invalids('Zero-Byte')

    def test_quarantine_deleted_files(self):
        try:
            self._get_open_disk_file(invalid_type='Content-Length')
        except DiskFileQuarantined:
            pass
        else:
            self.fail("Expected DiskFileQuarantined exception")
        try:
            self._get_open_disk_file(invalid_type='Content-Length',
                                     mark_deleted=True)
        except DiskFileQuarantined as err:
            self.fail("Unexpected DiskFileQuarantined exception"
                      " encountered: %r" % err)
        except DiskFileNotExist:
            pass
        else:
            self.fail("Expected DiskFileNotExist exception")
        try:
            self._get_open_disk_file(invalid_type='Content-Length',
                                     mark_deleted=True)
        except DiskFileNotExist:
            pass
        else:
            self.fail("Expected DiskFileNotExist exception")

    def test_quarantine_missing_content_length(self):
        self.assertRaises(
            DiskFileQuarantined,
            self._get_open_disk_file,
            invalid_type='Missing-Content-Length')

    def test_quarantine_bad_content_length(self):
        self.assertRaises(
            DiskFileQuarantined,
            self._get_open_disk_file,
            invalid_type='Bad-Content-Length')

    def test_quarantine_fstat_oserror(self):
        invocations = [0]
        orig_os_fstat = os.fstat

        def bad_fstat(fd):
            invocations[0] += 1
            if invocations[0] == 4:
                # FIXME - yes, this an icky way to get code coverage ... worth
                # it?
                raise OSError()
            return orig_os_fstat(fd)

        with mock.patch('os.fstat', bad_fstat):
            self.assertRaises(
                DiskFileQuarantined,
                self._get_open_disk_file)

    def test_quarantine_hashdir_not_a_directory(self):
        df = self._create_test_file('1234567890', account="abc",
                                    container='123', obj='xyz')
        hashdir = df._datadir
        rmtree(hashdir)
        with open(hashdir, 'w'):
            pass

        df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123',
                                      'xyz', policy=POLICIES.legacy)
        self.assertRaises(DiskFileQuarantined, df.open)

        # make sure the right thing got quarantined; the suffix dir should not
        # have moved, as that could have many objects in it
        self.assertFalse(os.path.exists(hashdir))
        self.assertTrue(os.path.exists(os.path.dirname(hashdir)))

    def test_create_prealloc(self):
        df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123',
                                      'xyz', policy=POLICIES.legacy)
        with mock.patch("swift.obj.diskfile.fallocate") as fa:
            with df.create(size=200) as writer:
                used_fd = writer._fd
        fa.assert_called_with(used_fd, 200)

    def test_create_prealloc_oserror(self):
        df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123',
                                      'xyz', policy=POLICIES.legacy)
        for e in (errno.ENOSPC, errno.EDQUOT):
            with mock.patch("swift.obj.diskfile.fallocate",
                            mock.MagicMock(side_effect=OSError(
                                e, os.strerror(e)))):
                try:
                    with df.create(size=200):
                        pass
                except DiskFileNoSpace:
                    pass
                else:
                    self.fail("Expected exception DiskFileNoSpace")

        # Other OSErrors must not be raised as DiskFileNoSpace
        with mock.patch("swift.obj.diskfile.fallocate",
                        mock.MagicMock(side_effect=OSError(
                            errno.EACCES, os.strerror(errno.EACCES)))):
            try:
                with df.create(size=200):
                    pass
            except OSError:
                pass
            else:
                self.fail("Expected exception OSError")

    def test_create_mkstemp_no_space(self):
        df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123',
                                      'xyz', policy=POLICIES.legacy)
        for e in (errno.ENOSPC, errno.EDQUOT):
            with mock.patch("swift.obj.diskfile.mkstemp",
                            mock.MagicMock(side_effect=OSError(
                                e, os.strerror(e)))):
                try:
                    with df.create(size=200):
                        pass
                except DiskFileNoSpace:
                    pass
                else:
                    self.fail("Expected exception DiskFileNoSpace")

        # Other OSErrors must not be raised as DiskFileNoSpace
        with mock.patch("swift.obj.diskfile.mkstemp",
                        mock.MagicMock(side_effect=OSError(
                            errno.EACCES, os.strerror(errno.EACCES)))):
            try:
                with df.create(size=200):
                    pass
            except OSError:
                pass
            else:
                self.fail("Expected exception OSError")

    def test_create_close_oserror(self):
        df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123',
                                      'xyz', policy=POLICIES.legacy)
        with mock.patch("swift.obj.diskfile.os.close",
                        mock.MagicMock(side_effect=OSError(
                            errno.EACCES, os.strerror(errno.EACCES)))):
            try:
                with df.create(size=200):
                    pass
            except Exception as err:
                self.fail("Unexpected exception raised: %r" % err)
            else:
                pass

    def test_write_metadata(self):
        df = self._create_test_file('1234567890')
        file_count = len(os.listdir(df._datadir))
        timestamp = Timestamp(time()).internal
        metadata = {'X-Timestamp': timestamp, 'X-Object-Meta-test': 'data'}
        df.write_metadata(metadata)
        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), file_count + 1)
        exp_name = '%s.meta' % timestamp
        self.assertIn(exp_name, set(dl))

    def test_write_metadata_with_content_type(self):
        # if metadata has content-type then its time should be in file name
        df = self._create_test_file('1234567890')
        file_count = len(os.listdir(df._datadir))
        timestamp = Timestamp(time())
        metadata = {'X-Timestamp': timestamp.internal,
                    'X-Object-Meta-test': 'data',
                    'Content-Type': 'foo',
                    'Content-Type-Timestamp': timestamp.internal}
        df.write_metadata(metadata)
        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), file_count + 1)
        exp_name = '%s+0.meta' % timestamp.internal
        self.assertTrue(exp_name in set(dl),
                        'Expected file %s not found in %s' % (exp_name, dl))

    def test_write_metadata_with_older_content_type(self):
        # if metadata has content-type then its time should be in file name
        ts_iter = make_timestamp_iter()
        df = self._create_test_file('1234567890', timestamp=ts_iter.next())
        file_count = len(os.listdir(df._datadir))
        timestamp = ts_iter.next()
        timestamp2 = ts_iter.next()
        metadata = {'X-Timestamp': timestamp2.internal,
                    'X-Object-Meta-test': 'data',
                    'Content-Type': 'foo',
                    'Content-Type-Timestamp': timestamp.internal}
        df.write_metadata(metadata)
        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), file_count + 1, dl)
        exp_name = '%s-%x.meta' % (timestamp2.internal,
                                   timestamp2.raw - timestamp.raw)
        self.assertTrue(exp_name in set(dl),
                        'Expected file %s not found in %s' % (exp_name, dl))

    def test_write_metadata_with_content_type_removes_same_time_meta(self):
        # a meta file without content-type should be cleaned up in favour of
        # a meta file at same time with content-type
        ts_iter = make_timestamp_iter()
        df = self._create_test_file('1234567890', timestamp=ts_iter.next())
        file_count = len(os.listdir(df._datadir))
        timestamp = ts_iter.next()
        timestamp2 = ts_iter.next()
        metadata = {'X-Timestamp': timestamp2.internal,
                    'X-Object-Meta-test': 'data'}
        df.write_metadata(metadata)
        metadata = {'X-Timestamp': timestamp2.internal,
                    'X-Object-Meta-test': 'data',
                    'Content-Type': 'foo',
                    'Content-Type-Timestamp': timestamp.internal}
        df.write_metadata(metadata)

        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), file_count + 1, dl)
        exp_name = '%s-%x.meta' % (timestamp2.internal,
                                   timestamp2.raw - timestamp.raw)
        self.assertTrue(exp_name in set(dl),
                        'Expected file %s not found in %s' % (exp_name, dl))

    def test_write_metadata_with_content_type_removes_multiple_metas(self):
        # a combination of a meta file without content-type and an older meta
        # file with content-type should be cleaned up in favour of a meta file
        # at newer time with content-type
        ts_iter = make_timestamp_iter()
        df = self._create_test_file('1234567890', timestamp=ts_iter.next())
        file_count = len(os.listdir(df._datadir))
        timestamp = ts_iter.next()
        timestamp2 = ts_iter.next()
        metadata = {'X-Timestamp': timestamp2.internal,
                    'X-Object-Meta-test': 'data'}
        df.write_metadata(metadata)
        metadata = {'X-Timestamp': timestamp.internal,
                    'X-Object-Meta-test': 'data',
                    'Content-Type': 'foo',
                    'Content-Type-Timestamp': timestamp.internal}
        df.write_metadata(metadata)

        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), file_count + 2, dl)

        metadata = {'X-Timestamp': timestamp2.internal,
                    'X-Object-Meta-test': 'data',
                    'Content-Type': 'foo',
                    'Content-Type-Timestamp': timestamp.internal}
        df.write_metadata(metadata)

        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), file_count + 1, dl)
        exp_name = '%s-%x.meta' % (timestamp2.internal,
                                   timestamp2.raw - timestamp.raw)
        self.assertTrue(exp_name in set(dl),
                        'Expected file %s not found in %s' % (exp_name, dl))

    def test_write_metadata_no_xattr(self):
        timestamp = Timestamp(time()).internal
        metadata = {'X-Timestamp': timestamp, 'X-Object-Meta-test': 'data'}

        def mock_setxattr(*args, **kargs):
            error_num = errno.ENOTSUP if hasattr(errno, 'ENOTSUP') else \
                errno.EOPNOTSUPP
            raise IOError(error_num, "Operation not supported")

        with mock.patch('xattr.setxattr', mock_setxattr):
            self.assertRaises(
                DiskFileXattrNotSupported,
                diskfile.write_metadata, 'n/a', metadata)

    def test_write_metadata_disk_full(self):
        timestamp = Timestamp(time()).internal
        metadata = {'X-Timestamp': timestamp, 'X-Object-Meta-test': 'data'}

        def mock_setxattr_ENOSPC(*args, **kargs):
            raise IOError(errno.ENOSPC, "No space left on device")

        def mock_setxattr_EDQUOT(*args, **kargs):
            raise IOError(errno.EDQUOT, "Exceeded quota")

        with mock.patch('xattr.setxattr', mock_setxattr_ENOSPC):
            self.assertRaises(
                DiskFileNoSpace,
                diskfile.write_metadata, 'n/a', metadata)

        with mock.patch('xattr.setxattr', mock_setxattr_EDQUOT):
            self.assertRaises(
                DiskFileNoSpace,
                diskfile.write_metadata, 'n/a', metadata)

    def _create_diskfile_dir(self, timestamp, policy):
        timestamp = Timestamp(timestamp)
        df = self._simple_get_diskfile(account='a', container='c',
                                       obj='o_%s' % policy,
                                       policy=policy)

        with df.create() as writer:
            metadata = {
                'ETag': 'bogus_etag',
                'X-Timestamp': timestamp.internal,
                'Content-Length': '0',
            }
            if policy.policy_type == EC_POLICY:
                metadata['X-Object-Sysmeta-Ec-Frag-Index'] = \
                    df._frag_index or 7
            writer.put(metadata)
            writer.commit(timestamp)
        return writer._datadir

    def test_commit(self):
        for policy in POLICIES:
            # create first fileset as starting state
            timestamp = Timestamp(time()).internal
            datadir = self._create_diskfile_dir(timestamp, policy)
            dl = os.listdir(datadir)
            expected = ['%s.data' % timestamp]
            if policy.policy_type == EC_POLICY:
                expected = ['%s#2.data' % timestamp,
                            '%s.durable' % timestamp]
            self.assertEqual(len(dl), len(expected),
                             'Unexpected dir listing %s' % dl)
            self.assertEqual(sorted(expected), sorted(dl))

    def test_write_cleanup(self):
        for policy in POLICIES:
            # create first fileset as starting state
            timestamp_1 = Timestamp(time()).internal
            datadir_1 = self._create_diskfile_dir(timestamp_1, policy)
            # second write should clean up first fileset
            timestamp_2 = Timestamp(time() + 1).internal
            datadir_2 = self._create_diskfile_dir(timestamp_2, policy)
            # sanity check
            self.assertEqual(datadir_1, datadir_2)
            dl = os.listdir(datadir_2)
            expected = ['%s.data' % timestamp_2]
            if policy.policy_type == EC_POLICY:
                expected = ['%s#2.data' % timestamp_2,
                            '%s.durable' % timestamp_2]
            self.assertEqual(len(dl), len(expected),
                             'Unexpected dir listing %s' % dl)
            self.assertEqual(sorted(expected), sorted(dl))

    def test_commit_fsync(self):
        for policy in POLICIES:
            mock_fsync = mock.MagicMock()
            df = self._simple_get_diskfile(account='a', container='c',
                                           obj='o', policy=policy)

            timestamp = Timestamp(time())
            with df.create() as writer:
                metadata = {
                    'ETag': 'bogus_etag',
                    'X-Timestamp': timestamp.internal,
                    'Content-Length': '0',
                }
                writer.put(metadata)
                with mock.patch('swift.obj.diskfile.fsync', mock_fsync):
                    writer.commit(timestamp)
            expected = {
                EC_POLICY: 1,
                REPL_POLICY: 0,
            }[policy.policy_type]
            self.assertEqual(expected, mock_fsync.call_count)
            if policy.policy_type == EC_POLICY:
                self.assertTrue(isinstance(mock_fsync.call_args[0][0], int))

    def test_commit_ignores_cleanup_ondisk_files_error(self):
        for policy in POLICIES:
            # Check OSError from cleanup_ondisk_files is caught and ignored
            mock_cleanup = mock.MagicMock(side_effect=OSError)
            df = self._simple_get_diskfile(account='a', container='c',
                                           obj='o_error', policy=policy)

            timestamp = Timestamp(time())
            with df.create() as writer:
                metadata = {
                    'ETag': 'bogus_etag',
                    'X-Timestamp': timestamp.internal,
                    'Content-Length': '0',
                }
                writer.put(metadata)
                with mock.patch(self._manager_mock(
                        'cleanup_ondisk_files', df), mock_cleanup):
                    writer.commit(timestamp)
            expected = {
                EC_POLICY: 1,
                REPL_POLICY: 0,
            }[policy.policy_type]
            self.assertEqual(expected, mock_cleanup.call_count)
            expected = ['%s.data' % timestamp.internal]
            if policy.policy_type == EC_POLICY:
                expected = ['%s#2.data' % timestamp.internal,
                            '%s.durable' % timestamp.internal]
            dl = os.listdir(df._datadir)
            self.assertEqual(len(dl), len(expected),
                             'Unexpected dir listing %s' % dl)
            self.assertEqual(sorted(expected), sorted(dl))

    def test_number_calls_to_cleanup_ondisk_files_during_create(self):
        # Check how many calls are made to cleanup_ondisk_files, and when,
        # during put(), commit() sequence
        for policy in POLICIES:
            expected = {
                EC_POLICY: (0, 1),
                REPL_POLICY: (1, 0),
            }[policy.policy_type]
            df = self._simple_get_diskfile(account='a', container='c',
                                           obj='o_error', policy=policy)
            timestamp = Timestamp(time())
            with df.create() as writer:
                metadata = {
                    'ETag': 'bogus_etag',
                    'X-Timestamp': timestamp.internal,
                    'Content-Length': '0',
                }
                with mock.patch(self._manager_mock(
                        'cleanup_ondisk_files', df)) as mock_cleanup:
                    writer.put(metadata)
                    self.assertEqual(expected[0], mock_cleanup.call_count)
                with mock.patch(self._manager_mock(
                        'cleanup_ondisk_files', df)) as mock_cleanup:
                    writer.commit(timestamp)
                    self.assertEqual(expected[1], mock_cleanup.call_count)

    def test_number_calls_to_cleanup_ondisk_files_during_delete(self):
        # Check how many calls are made to cleanup_ondisk_files, and when,
        # for delete() and necessary prerequisite steps
        for policy in POLICIES:
            expected = {
                EC_POLICY: (0, 1, 1),
                REPL_POLICY: (1, 0, 1),
            }[policy.policy_type]
            df = self._simple_get_diskfile(account='a', container='c',
                                           obj='o_error', policy=policy)
            timestamp = Timestamp(time())
            with df.create() as writer:
                metadata = {
                    'ETag': 'bogus_etag',
                    'X-Timestamp': timestamp.internal,
                    'Content-Length': '0',
                }
                with mock.patch(self._manager_mock(
                        'cleanup_ondisk_files', df)) as mock_cleanup:
                    writer.put(metadata)
                    self.assertEqual(expected[0], mock_cleanup.call_count)
                with mock.patch(self._manager_mock(
                        'cleanup_ondisk_files', df)) as mock_cleanup:
                    writer.commit(timestamp)
                    self.assertEqual(expected[1], mock_cleanup.call_count)
                with mock.patch(self._manager_mock(
                        'cleanup_ondisk_files', df)) as mock_cleanup:
                    timestamp = Timestamp(time())
                    df.delete(timestamp)
                    self.assertEqual(expected[2], mock_cleanup.call_count)

    def test_delete(self):
        for policy in POLICIES:
            if policy.policy_type == EC_POLICY:
                metadata = {'X-Object-Sysmeta-Ec-Frag-Index': '1'}
                fi = 1
            else:
                metadata = {}
                fi = None
            df = self._get_open_disk_file(policy=policy, frag_index=fi,
                                          extra_metadata=metadata)

            ts = Timestamp(time())
            df.delete(ts)
            exp_name = '%s.ts' % ts.internal
            dl = os.listdir(df._datadir)
            self.assertEqual(len(dl), 1)
            self.assertIn(exp_name, set(dl))
            # cleanup before next policy
            os.unlink(os.path.join(df._datadir, exp_name))

    def test_open_deleted(self):
        df = self._get_open_disk_file()
        ts = time()
        df.delete(ts)
        exp_name = '%s.ts' % str(Timestamp(ts).internal)
        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), 1)
        self.assertIn(exp_name, set(dl))
        df = self._simple_get_diskfile()
        self.assertRaises(DiskFileDeleted, df.open)

    def test_open_deleted_with_corrupt_tombstone(self):
        df = self._get_open_disk_file()
        ts = time()
        df.delete(ts)
        exp_name = '%s.ts' % str(Timestamp(ts).internal)
        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), 1)
        self.assertIn(exp_name, set(dl))
        # it's pickle-format, so removing the last byte is sufficient to
        # corrupt it
        ts_fullpath = os.path.join(df._datadir, exp_name)
        self.assertTrue(os.path.exists(ts_fullpath))  # sanity check
        meta_xattr = xattr.getxattr(ts_fullpath, "user.swift.metadata")
        xattr.setxattr(ts_fullpath, "user.swift.metadata", meta_xattr[:-1])

        df = self._simple_get_diskfile()
        self.assertRaises(DiskFileNotExist, df.open)
        self.assertFalse(os.path.exists(ts_fullpath))

    def test_from_audit_location(self):
        hashdir = self._create_test_file(
            'blah blah',
            account='three', container='blind', obj='mice')._datadir
        df = self.df_mgr.get_diskfile_from_audit_location(
            diskfile.AuditLocation(hashdir, self.existing_device, '0',
                                   policy=POLICIES.default))
        df.open()
        self.assertEqual(df._name, '/three/blind/mice')

    def test_from_audit_location_with_mismatched_hash(self):
        hashdir = self._create_test_file(
            'blah blah',
            account='this', container='is', obj='right')._datadir
        datafilename = [f for f in os.listdir(hashdir)
                        if f.endswith('.data')][0]
        datafile = os.path.join(hashdir, datafilename)
        meta = diskfile.read_metadata(datafile)
        meta['name'] = '/this/is/wrong'
        diskfile.write_metadata(datafile, meta)

        df = self.df_mgr.get_diskfile_from_audit_location(
            diskfile.AuditLocation(hashdir, self.existing_device, '0',
                                   policy=POLICIES.default))
        self.assertRaises(DiskFileQuarantined, df.open)

    def test_close_error(self):

        def mock_handle_close_quarantine():
            raise Exception("Bad")

        df = self._get_open_disk_file(fsize=1024 * 1024 * 2, csize=1024)
        reader = df.reader()
        reader._handle_close_quarantine = mock_handle_close_quarantine
        for chunk in reader:
            pass
        # close is called at the end of the iterator
        self.assertEqual(reader._fp, None)
        error_lines = df._logger.get_lines_for_level('error')
        self.assertEqual(len(error_lines), 1)
        self.assertIn('close failure', error_lines[0])
        self.assertIn('Bad', error_lines[0])

    def test_mount_checking(self):

        def _mock_cm(*args, **kwargs):
            return False

        with mock.patch("swift.common.constraints.check_mount", _mock_cm):
            self.assertRaises(
                DiskFileDeviceUnavailable,
                self._get_open_disk_file,
                mount_check=True)

    def test_ondisk_search_loop_ts_meta_data(self):
        df = self._simple_get_diskfile()
        self._create_ondisk_file(df, '', ext='.ts', timestamp=10)
        self._create_ondisk_file(df, '', ext='.ts', timestamp=9)
        self._create_ondisk_file(df, '', ext='.meta', timestamp=8)
        self._create_ondisk_file(df, '', ext='.meta', timestamp=7)
        self._create_ondisk_file(df, 'B', ext='.data', timestamp=6)
        self._create_ondisk_file(df, 'A', ext='.data', timestamp=5)
        df = self._simple_get_diskfile()
        try:
            df.open()
        except DiskFileDeleted as d:
            self.assertEqual(d.timestamp, Timestamp(10).internal)
        else:
            self.fail("Expected DiskFileDeleted exception")

    def test_ondisk_search_loop_meta_ts_data(self):
        df = self._simple_get_diskfile()
        self._create_ondisk_file(df, '', ext='.meta', timestamp=10)
        self._create_ondisk_file(df, '', ext='.meta', timestamp=9)
        self._create_ondisk_file(df, '', ext='.ts', timestamp=8)
        self._create_ondisk_file(df, '', ext='.ts', timestamp=7)
        self._create_ondisk_file(df, 'B', ext='.data', timestamp=6)
        self._create_ondisk_file(df, 'A', ext='.data', timestamp=5)
        df = self._simple_get_diskfile()
        try:
            df.open()
        except DiskFileDeleted as d:
            self.assertEqual(d.timestamp, Timestamp(8).internal)
        else:
            self.fail("Expected DiskFileDeleted exception")

    def test_ondisk_search_loop_meta_data_ts(self):
        df = self._simple_get_diskfile()
        self._create_ondisk_file(df, '', ext='.meta', timestamp=10)
        self._create_ondisk_file(df, '', ext='.meta', timestamp=9)
        self._create_ondisk_file(df, 'B', ext='.data', timestamp=8)
        self._create_ondisk_file(df, 'A', ext='.data', timestamp=7)
        if df.policy.policy_type == EC_POLICY:
            self._create_ondisk_file(df, '', ext='.durable', timestamp=8)
            self._create_ondisk_file(df, '', ext='.durable', timestamp=7)
        self._create_ondisk_file(df, '', ext='.ts', timestamp=6)
        self._create_ondisk_file(df, '', ext='.ts', timestamp=5)
        df = self._simple_get_diskfile()
        with df.open():
            self.assertIn('X-Timestamp', df._metadata)
            self.assertEqual(df._metadata['X-Timestamp'],
                             Timestamp(10).internal)
            self.assertNotIn('deleted', df._metadata)

    def test_ondisk_search_loop_multiple_meta_data(self):
        df = self._simple_get_diskfile()
        self._create_ondisk_file(df, '', ext='.meta', timestamp=10,
                                 metadata={'X-Object-Meta-User': 'user-meta'})
        self._create_ondisk_file(df, '', ext='.meta', timestamp=9,
                                 ctype_timestamp=9,
                                 metadata={'Content-Type': 'newest',
                                           'X-Object-Meta-User': 'blah'})
        self._create_ondisk_file(df, 'B', ext='.data', timestamp=8,
                                 metadata={'Content-Type': 'newer'})
        self._create_ondisk_file(df, 'A', ext='.data', timestamp=7,
                                 metadata={'Content-Type': 'oldest'})
        if df.policy.policy_type == EC_POLICY:
            self._create_ondisk_file(df, '', ext='.durable', timestamp=8)
            self._create_ondisk_file(df, '', ext='.durable', timestamp=7)
        df = self._simple_get_diskfile()
        with df.open():
            self.assertTrue('X-Timestamp' in df._metadata)
            self.assertEqual(df._metadata['X-Timestamp'],
                             Timestamp(10).internal)
            self.assertTrue('Content-Type' in df._metadata)
            self.assertEqual(df._metadata['Content-Type'], 'newest')
            self.assertTrue('X-Object-Meta-User' in df._metadata)
            self.assertEqual(df._metadata['X-Object-Meta-User'], 'user-meta')

    def test_ondisk_search_loop_stale_meta_data(self):
        df = self._simple_get_diskfile()
        self._create_ondisk_file(df, '', ext='.meta', timestamp=10,
                                 metadata={'X-Object-Meta-User': 'user-meta'})
        self._create_ondisk_file(df, '', ext='.meta', timestamp=9,
                                 ctype_timestamp=7,
                                 metadata={'Content-Type': 'older',
                                           'X-Object-Meta-User': 'blah'})
        self._create_ondisk_file(df, 'B', ext='.data', timestamp=8,
                                 metadata={'Content-Type': 'newer'})
        if df.policy.policy_type == EC_POLICY:
            self._create_ondisk_file(df, '', ext='.durable', timestamp=8)
        df = self._simple_get_diskfile()
        with df.open():
            self.assertTrue('X-Timestamp' in df._metadata)
            self.assertEqual(df._metadata['X-Timestamp'],
                             Timestamp(10).internal)
            self.assertTrue('Content-Type' in df._metadata)
            self.assertEqual(df._metadata['Content-Type'], 'newer')
            self.assertTrue('X-Object-Meta-User' in df._metadata)
            self.assertEqual(df._metadata['X-Object-Meta-User'], 'user-meta')

    def test_ondisk_search_loop_data_ts_meta(self):
        df = self._simple_get_diskfile()
        self._create_ondisk_file(df, 'B', ext='.data', timestamp=10)
        self._create_ondisk_file(df, 'A', ext='.data', timestamp=9)
        if df.policy.policy_type == EC_POLICY:
            self._create_ondisk_file(df, '', ext='.durable', timestamp=10)
            self._create_ondisk_file(df, '', ext='.durable', timestamp=9)
        self._create_ondisk_file(df, '', ext='.ts', timestamp=8)
        self._create_ondisk_file(df, '', ext='.ts', timestamp=7)
        self._create_ondisk_file(df, '', ext='.meta', timestamp=6)
        self._create_ondisk_file(df, '', ext='.meta', timestamp=5)
        df = self._simple_get_diskfile()
        with df.open():
            self.assertIn('X-Timestamp', df._metadata)
            self.assertEqual(df._metadata['X-Timestamp'],
                             Timestamp(10).internal)
            self.assertNotIn('deleted', df._metadata)

    def test_ondisk_search_loop_wayward_files_ignored(self):
        df = self._simple_get_diskfile()
        self._create_ondisk_file(df, 'X', ext='.bar', timestamp=11)
        self._create_ondisk_file(df, 'B', ext='.data', timestamp=10)
        self._create_ondisk_file(df, 'A', ext='.data', timestamp=9)
        if df.policy.policy_type == EC_POLICY:
            self._create_ondisk_file(df, '', ext='.durable', timestamp=10)
            self._create_ondisk_file(df, '', ext='.durable', timestamp=9)
        self._create_ondisk_file(df, '', ext='.ts', timestamp=8)
        self._create_ondisk_file(df, '', ext='.ts', timestamp=7)
        self._create_ondisk_file(df, '', ext='.meta', timestamp=6)
        self._create_ondisk_file(df, '', ext='.meta', timestamp=5)
        df = self._simple_get_diskfile()
        with df.open():
            self.assertIn('X-Timestamp', df._metadata)
            self.assertEqual(df._metadata['X-Timestamp'],
                             Timestamp(10).internal)
            self.assertNotIn('deleted', df._metadata)

    def test_ondisk_search_loop_listdir_error(self):
        df = self._simple_get_diskfile()

        def mock_listdir_exp(*args, **kwargs):
            raise OSError(errno.EACCES, os.strerror(errno.EACCES))

        with mock.patch("os.listdir", mock_listdir_exp):
            self._create_ondisk_file(df, 'X', ext='.bar', timestamp=11)
            self._create_ondisk_file(df, 'B', ext='.data', timestamp=10)
            self._create_ondisk_file(df, 'A', ext='.data', timestamp=9)
            if df.policy.policy_type == EC_POLICY:
                self._create_ondisk_file(df, '', ext='.durable', timestamp=10)
                self._create_ondisk_file(df, '', ext='.durable', timestamp=9)
            self._create_ondisk_file(df, '', ext='.ts', timestamp=8)
            self._create_ondisk_file(df, '', ext='.ts', timestamp=7)
            self._create_ondisk_file(df, '', ext='.meta', timestamp=6)
            self._create_ondisk_file(df, '', ext='.meta', timestamp=5)
            df = self._simple_get_diskfile()
            self.assertRaises(DiskFileError, df.open)

    def test_exception_in_handle_close_quarantine(self):
        df = self._get_open_disk_file()

        def blow_up():
            raise Exception('a very special error')

        reader = df.reader()
        reader._handle_close_quarantine = blow_up
        for _ in reader:
            pass
        reader.close()
        log_lines = df._logger.get_lines_for_level('error')
        self.assertIn('a very special error', log_lines[-1])

    def test_diskfile_names(self):
        df = self._simple_get_diskfile()
        self.assertEqual(df.account, 'a')
        self.assertEqual(df.container, 'c')
        self.assertEqual(df.obj, 'o')

    def test_diskfile_content_length_not_open(self):
        df = self._simple_get_diskfile()
        exc = None
        try:
            df.content_length
        except DiskFileNotOpen as err:
            exc = err
        self.assertEqual(str(exc), '')

    def test_diskfile_content_length_deleted(self):
        df = self._get_open_disk_file()
        ts = time()
        df.delete(ts)
        exp_name = '%s.ts' % str(Timestamp(ts).internal)
        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), 1)
        self.assertIn(exp_name, set(dl))
        df = self._simple_get_diskfile()
        exc = None
        try:
            with df.open():
                df.content_length
        except DiskFileDeleted as err:
            exc = err
        self.assertEqual(str(exc), '')

    def test_diskfile_content_length(self):
        self._get_open_disk_file()
        df = self._simple_get_diskfile()
        with df.open():
            self.assertEqual(df.content_length, 1024)

    def test_diskfile_timestamp_not_open(self):
        df = self._simple_get_diskfile()
        exc = None
        try:
            df.timestamp
        except DiskFileNotOpen as err:
            exc = err
        self.assertEqual(str(exc), '')

    def test_diskfile_timestamp_deleted(self):
        df = self._get_open_disk_file()
        ts = time()
        df.delete(ts)
        exp_name = '%s.ts' % str(Timestamp(ts).internal)
        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), 1)
        self.assertIn(exp_name, set(dl))
        df = self._simple_get_diskfile()
        exc = None
        try:
            with df.open():
                df.timestamp
        except DiskFileDeleted as err:
            exc = err
        self.assertEqual(str(exc), '')

    def test_diskfile_timestamp(self):
        ts_1 = self.ts()
        self._get_open_disk_file(ts=ts_1.internal)
        df = self._simple_get_diskfile()
        with df.open():
            self.assertEqual(df.timestamp, ts_1.internal)
        ts_2 = self.ts()
        df.write_metadata({'X-Timestamp': ts_2.internal})
        with df.open():
            self.assertEqual(df.timestamp, ts_2.internal)

    def test_data_timestamp(self):
        ts_1 = self.ts()
        self._get_open_disk_file(ts=ts_1.internal)
        df = self._simple_get_diskfile()
        with df.open():
            self.assertEqual(df.data_timestamp, ts_1.internal)
        ts_2 = self.ts()
        df.write_metadata({'X-Timestamp': ts_2.internal})
        with df.open():
            self.assertEqual(df.data_timestamp, ts_1.internal)

    def test_data_timestamp_not_open(self):
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotOpen):
            df.data_timestamp

    def test_content_type_and_timestamp(self):
        ts_1 = self.ts()
        self._get_open_disk_file(ts=ts_1.internal,
                                 extra_metadata={'Content-Type': 'image/jpeg'})
        df = self._simple_get_diskfile()
        with df.open():
            self.assertEqual(ts_1.internal, df.data_timestamp)
            self.assertEqual(ts_1.internal, df.timestamp)
            self.assertEqual(ts_1.internal, df.content_type_timestamp)
            self.assertEqual('image/jpeg', df.content_type)
        ts_2 = self.ts()
        ts_3 = self.ts()
        df.write_metadata({'X-Timestamp': ts_3.internal,
                           'Content-Type': 'image/gif',
                           'Content-Type-Timestamp': ts_2.internal})
        with df.open():
            self.assertEqual(ts_1.internal, df.data_timestamp)
            self.assertEqual(ts_3.internal, df.timestamp)
            self.assertEqual(ts_2.internal, df.content_type_timestamp)
            self.assertEqual('image/gif', df.content_type)

    def test_content_type_timestamp_not_open(self):
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotOpen):
            df.content_type_timestamp

    def test_content_type_not_open(self):
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotOpen):
            df.content_type

    def test_durable_timestamp(self):
        ts_1 = self.ts()
        df = self._get_open_disk_file(ts=ts_1.internal)
        with df.open():
            self.assertEqual(df.durable_timestamp, ts_1.internal)
        # verify durable timestamp does not change when metadata is written
        ts_2 = self.ts()
        df.write_metadata({'X-Timestamp': ts_2.internal})
        with df.open():
            self.assertEqual(df.durable_timestamp, ts_1.internal)

    def test_durable_timestamp_not_open(self):
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotOpen):
            df.durable_timestamp

    def test_durable_timestamp_no_data_file(self):
        df = self._get_open_disk_file(self.ts().internal)
        for f in os.listdir(df._datadir):
            if f.endswith('.data'):
                os.unlink(os.path.join(df._datadir, f))
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotExist):
            df.open()
        # open() was attempted, but no data file so expect None
        self.assertIsNone(df.durable_timestamp)

    def test_error_in_cleanup_ondisk_files(self):

        def mock_cleanup(*args, **kwargs):
            raise OSError()

        df = self._get_open_disk_file()
        file_count = len(os.listdir(df._datadir))
        ts = time()
        with mock.patch(
                self._manager_mock('cleanup_ondisk_files'), mock_cleanup):
            try:
                df.delete(ts)
            except OSError:
                self.fail("OSError raised when it should have been swallowed")
        exp_name = '%s.ts' % str(Timestamp(ts).internal)
        dl = os.listdir(df._datadir)
        self.assertEqual(len(dl), file_count + 1)
        self.assertIn(exp_name, set(dl))

    def _system_can_zero_copy(self):
        if not splice.available:
            return False

        try:
            utils.get_md5_socket()
        except IOError:
            return False

        return True

    def test_zero_copy_cache_dropping(self):
        if not self._system_can_zero_copy():
            raise SkipTest("zero-copy support is missing")

        self.conf['splice'] = 'on'
        self.conf['keep_cache_size'] = 16384
        self.conf['disk_chunk_size'] = 4096

        df = self._get_open_disk_file(fsize=16385)
        reader = df.reader()
        self.assertTrue(reader.can_zero_copy_send())
        with mock.patch("swift.obj.diskfile.drop_buffer_cache") as dbc:
            with mock.patch("swift.obj.diskfile.DROP_CACHE_WINDOW", 4095):
                with open('/dev/null', 'w') as devnull:
                    reader.zero_copy_send(devnull.fileno())
                self.assertEqual(len(dbc.mock_calls), 5)

    def test_zero_copy_turns_off_when_md5_sockets_not_supported(self):
        if not self._system_can_zero_copy():
            raise SkipTest("zero-copy support is missing")
        df_mgr = self.df_router[POLICIES.default]
        self.conf['splice'] = 'on'
        with mock.patch('swift.obj.diskfile.get_md5_socket') as mock_md5sock:
            mock_md5sock.side_effect = IOError(
                errno.EAFNOSUPPORT, "MD5 socket busted")
            df = self._get_open_disk_file(fsize=128)
            reader = df.reader()
            self.assertFalse(reader.can_zero_copy_send())

            log_lines = df_mgr.logger.get_lines_for_level('warning')
            self.assertIn('MD5 sockets', log_lines[-1])

    def test_tee_to_md5_pipe_length_mismatch(self):
        if not self._system_can_zero_copy():
            raise SkipTest("zero-copy support is missing")

        self.conf['splice'] = 'on'

        df = self._get_open_disk_file(fsize=16385)
        reader = df.reader()
        self.assertTrue(reader.can_zero_copy_send())

        with mock.patch('swift.obj.diskfile.tee') as mock_tee:
            mock_tee.side_effect = lambda _1, _2, _3, cnt: cnt - 1

            with open('/dev/null', 'w') as devnull:
                exc_re = (r'tee\(\) failed: tried to move \d+ bytes, but only '
                          'moved -?\d+')
                try:
                    reader.zero_copy_send(devnull.fileno())
                except Exception as e:
                    self.assertTrue(re.match(exc_re, str(e)))
                else:
                    self.fail('Expected Exception was not raised')

    def test_splice_to_wsockfd_blocks(self):
        if not self._system_can_zero_copy():
            raise SkipTest("zero-copy support is missing")

        self.conf['splice'] = 'on'

        df = self._get_open_disk_file(fsize=16385)
        reader = df.reader()
        self.assertTrue(reader.can_zero_copy_send())

        def _run_test():
            # Set up mock of `splice`
            splice_called = [False]  # State hack

            def fake_splice(fd_in, off_in, fd_out, off_out, len_, flags):
                if fd_out == devnull.fileno() and not splice_called[0]:
                    splice_called[0] = True
                    err = errno.EWOULDBLOCK
                    raise IOError(err, os.strerror(err))

                return splice(fd_in, off_in, fd_out, off_out,
                              len_, flags)

            mock_splice.side_effect = fake_splice

            # Set up mock of `trampoline`
            # There are 2 reasons to mock this:
            #
            # - We want to ensure it's called with the expected arguments at
            #   least once
            # - When called with our write FD (which points to `/dev/null`), we
            #   can't actually call `trampoline`, because adding such FD to an
            #   `epoll` handle results in `EPERM`
            def fake_trampoline(fd, read=None, write=None, timeout=None,
                                timeout_exc=timeout.Timeout,
                                mark_as_closed=None):
                if write and fd == devnull.fileno():
                    return
                else:
                    hubs.trampoline(fd, read=read, write=write,
                                    timeout=timeout, timeout_exc=timeout_exc,
                                    mark_as_closed=mark_as_closed)

            mock_trampoline.side_effect = fake_trampoline

            reader.zero_copy_send(devnull.fileno())

            # Assert the end of `zero_copy_send` was reached
            self.assertTrue(mock_close.called)
            # Assert there was at least one call to `trampoline` waiting for
            # `write` access to the output FD
            mock_trampoline.assert_any_call(devnull.fileno(), write=True)
            # Assert at least one call to `splice` with the output FD we expect
            for call in mock_splice.call_args_list:
                args = call[0]
                if args[2] == devnull.fileno():
                    break
            else:
                self.fail('`splice` not called with expected arguments')

        with mock.patch('swift.obj.diskfile.splice') as mock_splice:
            with mock.patch.object(
                    reader, 'close', side_effect=reader.close) as mock_close:
                with open('/dev/null', 'w') as devnull:
                    with mock.patch('swift.obj.diskfile.trampoline') as \
                            mock_trampoline:
                        _run_test()

    def test_create_unlink_cleanup_DiskFileNoSpace(self):
        # Test cleanup when DiskFileNoSpace() is raised.
        df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123',
                                      'xyz', policy=POLICIES.legacy)
        _m_fallocate = mock.MagicMock(side_effect=OSError(errno.ENOSPC,
                                      os.strerror(errno.ENOSPC)))
        _m_unlink = mock.Mock()
        with mock.patch("swift.obj.diskfile.fallocate", _m_fallocate):
            with mock.patch("os.unlink", _m_unlink):
                try:
                    with df.create(size=100):
                        pass
                except DiskFileNoSpace:
                    pass
                else:
                    self.fail("Expected exception DiskFileNoSpace")
        self.assertTrue(_m_fallocate.called)
        self.assertTrue(_m_unlink.called)
        self.assertNotIn('error', self.logger.all_log_lines())

    def test_create_unlink_cleanup_renamer_fails(self):
        # Test cleanup when renamer fails
        _m_renamer = mock.MagicMock(side_effect=OSError(errno.ENOENT,
                                    os.strerror(errno.ENOENT)))
        _m_unlink = mock.Mock()
        df = self._simple_get_diskfile()
        data = '0' * 100
        metadata = {
            'ETag': md5(data).hexdigest(),
            'X-Timestamp': Timestamp(time()).internal,
            'Content-Length': str(100),
        }
        with mock.patch("swift.obj.diskfile.renamer", _m_renamer):
            with mock.patch("os.unlink", _m_unlink):
                try:
                    with df.create(size=100) as writer:
                        writer.write(data)
                        writer.put(metadata)
                except OSError:
                    pass
                else:
                    self.fail("Expected OSError exception")
        self.assertFalse(writer.put_succeeded)
        self.assertTrue(_m_renamer.called)
        self.assertTrue(_m_unlink.called)
        self.assertNotIn('error', self.logger.all_log_lines())

    def test_create_unlink_cleanup_logging(self):
        # Test logging of os.unlink() failures.
        df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123',
                                      'xyz', policy=POLICIES.legacy)
        _m_fallocate = mock.MagicMock(side_effect=OSError(errno.ENOSPC,
                                      os.strerror(errno.ENOSPC)))
        _m_unlink = mock.MagicMock(side_effect=OSError(errno.ENOENT,
                                   os.strerror(errno.ENOENT)))
        with mock.patch("swift.obj.diskfile.fallocate", _m_fallocate):
            with mock.patch("os.unlink", _m_unlink):
                try:
                    with df.create(size=100):
                        pass
                except DiskFileNoSpace:
                    pass
                else:
                    self.fail("Expected exception DiskFileNoSpace")
        self.assertTrue(_m_fallocate.called)
        self.assertTrue(_m_unlink.called)
        error_lines = self.logger.get_lines_for_level('error')
        for line in error_lines:
            self.assertTrue(line.startswith("Error removing tempfile:"))


@patch_policies(test_policies)
class TestDiskFile(DiskFileMixin, unittest.TestCase):

    mgr_cls = diskfile.DiskFileManager


@patch_policies(with_ec_default=True)
class TestECDiskFile(DiskFileMixin, unittest.TestCase):

    mgr_cls = diskfile.ECDiskFileManager

    def test_commit_raises_DiskFileErrors(self):
        scenarios = ((errno.ENOSPC, DiskFileNoSpace),
                     (errno.EDQUOT, DiskFileNoSpace),
                     (errno.ENOTDIR, DiskFileError),
                     (errno.EPERM, DiskFileError))

        # Check IOErrors from open() is handled
        for err_number, expected_exception in scenarios:
            io_error = IOError()
            io_error.errno = err_number
            mock_open = mock.MagicMock(side_effect=io_error)
            df = self._simple_get_diskfile(account='a', container='c',
                                           obj='o_%s' % err_number,
                                           policy=POLICIES.default)
            timestamp = Timestamp(time())
            with df.create() as writer:
                metadata = {
                    'ETag': 'bogus_etag',
                    'X-Timestamp': timestamp.internal,
                    'Content-Length': '0',
                }
                writer.put(metadata)
                with mock.patch('six.moves.builtins.open', mock_open):
                    self.assertRaises(expected_exception,
                                      writer.commit,
                                      timestamp)
            dl = os.listdir(df._datadir)
            self.assertEqual(1, len(dl), dl)
            rmtree(df._datadir)

        # Check OSError from fsync() is handled
        mock_fsync = mock.MagicMock(side_effect=OSError)
        df = self._simple_get_diskfile(account='a', container='c',
                                       obj='o_fsync_error')

        timestamp = Timestamp(time())
        with df.create() as writer:
            metadata = {
                'ETag': 'bogus_etag',
                'X-Timestamp': timestamp.internal,
                'Content-Length': '0',
            }
            writer.put(metadata)
            with mock.patch('swift.obj.diskfile.fsync', mock_fsync):
                self.assertRaises(DiskFileError,
                                  writer.commit, timestamp)

    def test_commit_fsync_dir_raises_DiskFileErrors(self):
        scenarios = ((errno.ENOSPC, DiskFileNoSpace),
                     (errno.EDQUOT, DiskFileNoSpace),
                     (errno.ENOTDIR, DiskFileError),
                     (errno.EPERM, DiskFileError))

        # Check IOErrors from fsync_dir() is handled
        for err_number, expected_exception in scenarios:
            io_error = IOError(err_number, os.strerror(err_number))
            mock_open = mock.MagicMock(side_effect=io_error)
            mock_io_error = mock.MagicMock(side_effect=io_error)
            df = self._simple_get_diskfile(account='a', container='c',
                                           obj='o_%s' % err_number,
                                           policy=POLICIES.default)
            timestamp = Timestamp(time())
            with df.create() as writer:
                metadata = {
                    'ETag': 'bogus_etag',
                    'X-Timestamp': timestamp.internal,
                    'Content-Length': '0',
                }
                writer.put(metadata)
                with mock.patch('six.moves.builtins.open', mock_open):
                    self.assertRaises(expected_exception,
                                      writer.commit,
                                      timestamp)
                with mock.patch('swift.obj.diskfile.fsync_dir', mock_io_error):
                    self.assertRaises(expected_exception,
                                      writer.commit,
                                      timestamp)
            dl = os.listdir(df._datadir)
            self.assertEqual(2, len(dl), dl)
            rmtree(df._datadir)

        # Check OSError from fsync_dir() is handled
        mock_os_error = mock.MagicMock(
            side_effect=OSError(100, 'Some Error'))
        df = self._simple_get_diskfile(account='a', container='c',
                                       obj='o_fsync_dir_error')

        timestamp = Timestamp(time())
        with df.create() as writer:
            metadata = {
                'ETag': 'bogus_etag',
                'X-Timestamp': timestamp.internal,
                'Content-Length': '0',
            }
            writer.put(metadata)
            with mock.patch('swift.obj.diskfile.fsync_dir', mock_os_error):
                self.assertRaises(DiskFileError,
                                  writer.commit, timestamp)

    def test_data_file_has_frag_index(self):
        policy = POLICIES.default
        for good_value in (0, '0', 2, '2', 14, '14'):
            # frag_index set by constructor arg
            ts = self.ts().internal
            expected = ['%s#%s.data' % (ts, good_value), '%s.durable' % ts]
            df = self._get_open_disk_file(ts=ts, policy=policy,
                                          frag_index=good_value)
            self.assertEqual(expected, sorted(os.listdir(df._datadir)))
            # frag index should be added to object sysmeta
            actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index')
            self.assertEqual(int(good_value), int(actual))

            # metadata value overrides the constructor arg
            ts = self.ts().internal
            expected = ['%s#%s.data' % (ts, good_value), '%s.durable' % ts]
            meta = {'X-Object-Sysmeta-Ec-Frag-Index': good_value}
            df = self._get_open_disk_file(ts=ts, policy=policy,
                                          frag_index='99',
                                          extra_metadata=meta)
            self.assertEqual(expected, sorted(os.listdir(df._datadir)))
            actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index')
            self.assertEqual(int(good_value), int(actual))

            # metadata value alone is sufficient
            ts = self.ts().internal
            expected = ['%s#%s.data' % (ts, good_value), '%s.durable' % ts]
            meta = {'X-Object-Sysmeta-Ec-Frag-Index': good_value}
            df = self._get_open_disk_file(ts=ts, policy=policy,
                                          frag_index=None,
                                          extra_metadata=meta)
            self.assertEqual(expected, sorted(os.listdir(df._datadir)))
            actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index')
            self.assertEqual(int(good_value), int(actual))

    def test_sysmeta_frag_index_is_immutable(self):
        # the X-Object-Sysmeta-Ec-Frag-Index should *only* be set when
        # the .data file is written.
        policy = POLICIES.default
        orig_frag_index = 14
        # frag_index set by constructor arg
        ts = self.ts().internal
        expected = ['%s#%s.data' % (ts, orig_frag_index), '%s.durable' % ts]
        df = self._get_open_disk_file(ts=ts, policy=policy, obj_name='my_obj',
                                      frag_index=orig_frag_index)
        self.assertEqual(expected, sorted(os.listdir(df._datadir)))
        # frag index should be added to object sysmeta
        actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index')
        self.assertEqual(int(orig_frag_index), int(actual))

        # open the same diskfile with no frag_index passed to constructor
        df = self.df_router[policy].get_diskfile(
            self.existing_device, 0, 'a', 'c', 'my_obj', policy=policy,
            frag_index=None)
        df.open()
        actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index')
        self.assertEqual(int(orig_frag_index), int(actual))

        # write metadata to a meta file
        ts = self.ts().internal
        metadata = {'X-Timestamp': ts,
                    'X-Object-Meta-Fruit': 'kiwi'}
        df.write_metadata(metadata)
        # sanity check we did write a meta file
        expected.append('%s.meta' % ts)
        actual_files = sorted(os.listdir(df._datadir))
        self.assertEqual(expected, actual_files)

        # open the same diskfile, check frag index is unchanged
        df = self.df_router[policy].get_diskfile(
            self.existing_device, 0, 'a', 'c', 'my_obj', policy=policy,
            frag_index=None)
        df.open()
        # sanity check we have read the meta file
        self.assertEqual(ts, df.get_metadata().get('X-Timestamp'))
        self.assertEqual('kiwi', df.get_metadata().get('X-Object-Meta-Fruit'))
        # check frag index sysmeta is unchanged
        actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index')
        self.assertEqual(int(orig_frag_index), int(actual))

        # attempt to overwrite frag index sysmeta
        ts = self.ts().internal
        metadata = {'X-Timestamp': ts,
                    'X-Object-Sysmeta-Ec-Frag-Index': 99,
                    'X-Object-Meta-Fruit': 'apple'}
        df.write_metadata(metadata)

        # open the same diskfile, check frag index is unchanged
        df = self.df_router[policy].get_diskfile(
            self.existing_device, 0, 'a', 'c', 'my_obj', policy=policy,
            frag_index=None)
        df.open()
        # sanity check we have read the meta file
        self.assertEqual(ts, df.get_metadata().get('X-Timestamp'))
        self.assertEqual('apple', df.get_metadata().get('X-Object-Meta-Fruit'))
        actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index')
        self.assertEqual(int(orig_frag_index), int(actual))

    def test_data_file_errors_bad_frag_index(self):
        policy = POLICIES.default
        df_mgr = self.df_router[policy]
        for bad_value in ('foo', '-2', -2, '3.14', 3.14):
            # check that bad frag_index set by constructor arg raises error
            # as soon as diskfile is constructed, before data is written
            self.assertRaises(DiskFileError, self._simple_get_diskfile,
                              policy=policy, frag_index=bad_value)

            # bad frag_index set by metadata value
            # (drive-by check that it is ok for constructor arg to be None)
            df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o',
                                     policy=policy, frag_index=None)
            ts = self.ts()
            meta = {'X-Object-Sysmeta-Ec-Frag-Index': bad_value,
                    'X-Timestamp': ts.internal,
                    'Content-Length': 0,
                    'Etag': EMPTY_ETAG,
                    'Content-Type': 'plain/text'}
            with df.create() as writer:
                try:
                    writer.put(meta)
                    self.fail('Expected DiskFileError for frag_index %s'
                              % bad_value)
                except DiskFileError:
                    pass

            # bad frag_index set by metadata value overrides ok constructor arg
            df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o',
                                     policy=policy, frag_index=2)
            ts = self.ts()
            meta = {'X-Object-Sysmeta-Ec-Frag-Index': bad_value,
                    'X-Timestamp': ts.internal,
                    'Content-Length': 0,
                    'Etag': EMPTY_ETAG,
                    'Content-Type': 'plain/text'}
            with df.create() as writer:
                try:
                    writer.put(meta)
                    self.fail('Expected DiskFileError for frag_index %s'
                              % bad_value)
                except DiskFileError:
                    pass

    def test_purge_one_fragment_index(self):
        ts = self.ts()
        for frag_index in (1, 2):
            df = self._simple_get_diskfile(frag_index=frag_index)
            with df.create() as writer:
                data = 'test data'
                writer.write(data)
                metadata = {
                    'ETag': md5(data).hexdigest(),
                    'X-Timestamp': ts.internal,
                    'Content-Length': len(data),
                }
                writer.put(metadata)
                writer.commit(ts)

        # sanity
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '#1.data',
            ts.internal + '#2.data',
            ts.internal + '.durable',
        ])
        df.purge(ts, 2)
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '#1.data',
            ts.internal + '.durable',
        ])

    def test_purge_last_fragment_index(self):
        ts = self.ts()
        frag_index = 0
        df = self._simple_get_diskfile(frag_index=frag_index)
        with df.create() as writer:
            data = 'test data'
            writer.write(data)
            metadata = {
                'ETag': md5(data).hexdigest(),
                'X-Timestamp': ts.internal,
                'Content-Length': len(data),
            }
            writer.put(metadata)
            writer.commit(ts)

        # sanity
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '#0.data',
            ts.internal + '.durable',
        ])
        df.purge(ts, 0)
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '.durable',
        ])

    def test_purge_non_existent_fragment_index(self):
        ts = self.ts()
        frag_index = 7
        df = self._simple_get_diskfile(frag_index=frag_index)
        with df.create() as writer:
            data = 'test data'
            writer.write(data)
            metadata = {
                'ETag': md5(data).hexdigest(),
                'X-Timestamp': ts.internal,
                'Content-Length': len(data),
            }
            writer.put(metadata)
            writer.commit(ts)

        # sanity
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '#7.data',
            ts.internal + '.durable',
        ])
        df.purge(ts, 3)
        # no effect
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '#7.data',
            ts.internal + '.durable',
        ])

    def test_purge_old_timestamp_frag_index(self):
        old_ts = self.ts()
        ts = self.ts()
        frag_index = 1
        df = self._simple_get_diskfile(frag_index=frag_index)
        with df.create() as writer:
            data = 'test data'
            writer.write(data)
            metadata = {
                'ETag': md5(data).hexdigest(),
                'X-Timestamp': ts.internal,
                'Content-Length': len(data),
            }
            writer.put(metadata)
            writer.commit(ts)

        # sanity
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '#1.data',
            ts.internal + '.durable',
        ])
        df.purge(old_ts, 1)
        # no effect
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '#1.data',
            ts.internal + '.durable',
        ])

    def test_purge_tombstone(self):
        ts = self.ts()
        df = self._simple_get_diskfile(frag_index=3)
        df.delete(ts)

        # sanity
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '.ts',
        ])
        df.purge(ts, 3)
        self.assertEqual(sorted(os.listdir(df._datadir)), [])

    def test_purge_without_frag(self):
        ts = self.ts()
        df = self._simple_get_diskfile()
        df.delete(ts)

        # sanity
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '.ts',
        ])
        df.purge(ts, None)
        self.assertEqual(sorted(os.listdir(df._datadir)), [])

    def test_purge_old_tombstone(self):
        old_ts = self.ts()
        ts = self.ts()
        df = self._simple_get_diskfile(frag_index=5)
        df.delete(ts)

        # sanity
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '.ts',
        ])
        df.purge(old_ts, 5)
        # no effect
        self.assertEqual(sorted(os.listdir(df._datadir)), [
            ts.internal + '.ts',
        ])

    def test_purge_already_removed(self):
        df = self._simple_get_diskfile(frag_index=6)

        df.purge(self.ts(), 6)  # no errors

        # sanity
        os.makedirs(df._datadir)
        self.assertEqual(sorted(os.listdir(df._datadir)), [])
        df.purge(self.ts(), 6)
        # no effect
        self.assertEqual(sorted(os.listdir(df._datadir)), [])

    def test_open_most_recent_durable(self):
        policy = POLICIES.default
        df_mgr = self.df_router[policy]

        df = df_mgr.get_diskfile(self.existing_device, '0',
                                 'a', 'c', 'o', policy=policy)

        ts = self.ts()
        with df.create() as writer:
            data = 'test data'
            writer.write(data)
            metadata = {
                'ETag': md5(data).hexdigest(),
                'X-Timestamp': ts.internal,
                'Content-Length': len(data),
                'X-Object-Sysmeta-Ec-Frag-Index': 3,
            }
            writer.put(metadata)
            writer.commit(ts)

        # add some .meta stuff
        extra_meta = {
            'X-Object-Meta-Foo': 'Bar',
            'X-Timestamp': self.ts().internal,
        }
        df = df_mgr.get_diskfile(self.existing_device, '0',
                                 'a', 'c', 'o', policy=policy)
        df.write_metadata(extra_meta)

        # sanity
        df = df_mgr.get_diskfile(self.existing_device, '0',
                                 'a', 'c', 'o', policy=policy)
        metadata.update(extra_meta)
        self.assertEqual(metadata, df.read_metadata())

        # add a newer datafile
        df = df_mgr.get_diskfile(self.existing_device, '0',
                                 'a', 'c', 'o', policy=policy)
        ts = self.ts()
        with df.create() as writer:
            data = 'test data'
            writer.write(data)
            new_metadata = {
                'ETag': md5(data).hexdigest(),
                'X-Timestamp': ts.internal,
                'Content-Length': len(data),
                'X-Object-Sysmeta-Ec-Frag-Index': 3,
            }
            writer.put(new_metadata)
            # N.B. don't make it durable

        # and we still get the old metadata (same as if no .data!)
        df = df_mgr.get_diskfile(self.existing_device, '0',
                                 'a', 'c', 'o', policy=policy)
        self.assertEqual(metadata, df.read_metadata())

    def test_open_most_recent_missing_durable(self):
        policy = POLICIES.default
        df_mgr = self.df_router[policy]

        df = df_mgr.get_diskfile(self.existing_device, '0',
                                 'a', 'c', 'o', policy=policy)

        self.assertRaises(DiskFileNotExist, df.read_metadata)

        # now create a datafile missing durable
        ts = self.ts()
        with df.create() as writer:
            data = 'test data'
            writer.write(data)
            new_metadata = {
                'ETag': md5(data).hexdigest(),
                'X-Timestamp': ts.internal,
                'Content-Length': len(data),
                'X-Object-Sysmeta-Ec-Frag-Index': 3,
            }
            writer.put(new_metadata)
            # N.B. don't make it durable

        # add some .meta stuff
        extra_meta = {
            'X-Object-Meta-Foo': 'Bar',
            'X-Timestamp': self.ts().internal,
        }
        df = df_mgr.get_diskfile(self.existing_device, '0',
                                 'a', 'c', 'o', policy=policy)
        df.write_metadata(extra_meta)

        # we still get the DiskFileNotExist (same as if no .data!)
        df = df_mgr.get_diskfile(self.existing_device, '0',
                                 'a', 'c', 'o', policy=policy,
                                 frag_index=3)
        self.assertRaises(DiskFileNotExist, df.read_metadata)

        # sanity, withtout the frag_index kwarg
        df = df_mgr.get_diskfile(self.existing_device, '0',
                                 'a', 'c', 'o', policy=policy)
        self.assertRaises(DiskFileNotExist, df.read_metadata)

    def test_fragments(self):
        ts_1 = self.ts()
        self._get_open_disk_file(ts=ts_1.internal, frag_index=0)
        df = self._get_open_disk_file(ts=ts_1.internal, frag_index=2)
        self.assertEqual(df.fragments, {ts_1: [0, 2]})

        # now add a newer datafile for frag index 3 but don't write a
        # durable with it (so ignore the error when we try to open)
        ts_2 = self.ts()
        try:
            df = self._get_open_disk_file(ts=ts_2.internal, frag_index=3,
                                          commit=False)
        except DiskFileNotExist:
            pass

        # sanity check: should have 2* .data, .durable, .data
        files = os.listdir(df._datadir)
        self.assertEqual(4, len(files))
        with df.open():
            self.assertEqual(df.fragments, {ts_1: [0, 2], ts_2: [3]})

        # verify frags available even if open fails e.g. if .durable missing
        for f in filter(lambda f: f.endswith('.durable'), files):
            os.remove(os.path.join(df._datadir, f))

        self.assertRaises(DiskFileNotExist, df.open)
        self.assertEqual(df.fragments, {ts_1: [0, 2], ts_2: [3]})

    def test_fragments_not_open(self):
        df = self._simple_get_diskfile()
        self.assertIsNone(df.fragments)

    def test_durable_timestamp_no_durable_file(self):
        try:
            self._get_open_disk_file(self.ts().internal, commit=False)
        except DiskFileNotExist:
            pass
        df = self._simple_get_diskfile()
        with self.assertRaises(DiskFileNotExist):
            df.open()
        # open() was attempted, but no durable file so expect None
        self.assertIsNone(df.durable_timestamp)

    def test_durable_timestamp_missing_frag_index(self):
        ts1 = self.ts()
        self._get_open_disk_file(ts=ts1.internal, frag_index=1)
        df = self._simple_get_diskfile(frag_index=2)
        with self.assertRaises(DiskFileNotExist):
            df.open()
        # open() was attempted, but no data file for frag index so expect None
        self.assertIsNone(df.durable_timestamp)

    def test_durable_timestamp_newer_non_durable_data_file(self):
        ts1 = self.ts()
        self._get_open_disk_file(ts=ts1.internal)
        ts2 = self.ts()
        try:
            self._get_open_disk_file(ts=ts2.internal, commit=False)
        except DiskFileNotExist:
            pass
        df = self._simple_get_diskfile()
        # sanity check - one .durable file, two .data files
        self.assertEqual(3, len(os.listdir(df._datadir)))
        df.open()
        self.assertEqual(ts1, df.durable_timestamp)


@patch_policies(with_ec_default=True)
class TestSuffixHashes(unittest.TestCase):
    """
    This tests all things related to hashing suffixes and therefore
    there's also few test methods for cleanup_ondisk_files as well
    (because it's used by hash_suffix).

    The public interface to suffix hashing is on the Manager::

         * cleanup_ondisk_files(hsh_path)
         * get_hashes(device, partition, suffixes, policy)
         * invalidate_hash(suffix_dir)

    The Manager.get_hashes method (used by the REPLICATE verb)
    calls Manager._get_hashes (which may be an alias to the module
    method get_hashes), which calls hash_suffix, which calls
    cleanup_ondisk_files.

    Outside of that, cleanup_ondisk_files and invalidate_hash are
    used mostly after writing new files via PUT or DELETE.

    Test methods are organized by::

        * cleanup_ondisk_files tests - behaviors
        * cleanup_ondisk_files tests - error handling
        * invalidate_hash tests - behavior
        * invalidate_hash tests - error handling
        * get_hashes tests - hash_suffix behaviors
        * get_hashes tests - hash_suffix error handling
        * get_hashes tests - behaviors
        * get_hashes tests - error handling

    """

    def setUp(self):
        self.testdir = tempfile.mkdtemp()
        self.logger = debug_logger('suffix-hash-test')
        self.devices = os.path.join(self.testdir, 'node')
        os.mkdir(self.devices)
        self.existing_device = 'sda1'
        os.mkdir(os.path.join(self.devices, self.existing_device))
        self.conf = {
            'swift_dir': self.testdir,
            'devices': self.devices,
            'mount_check': False,
        }
        self.df_router = diskfile.DiskFileRouter(self.conf, self.logger)
        self._ts_iter = (Timestamp(t) for t in
                         itertools.count(int(time())))
        self.policy = None

    def ts(self):
        """
        Timestamps - forever.
        """
        return next(self._ts_iter)

    def fname_to_ts_hash(self, fname):
        """
        EC datafiles are only hashed by their timestamp
        """
        return md5(fname.split('#', 1)[0]).hexdigest()

    def tearDown(self):
        rmtree(self.testdir, ignore_errors=1)

    def iter_policies(self):
        for policy in POLICIES:
            self.policy = policy
            yield policy

    def assertEqual(self, *args):
        try:
            unittest.TestCase.assertEqual(self, *args)
        except AssertionError as err:
            if not self.policy:
                raise
            policy_trailer = '\n\n... for policy %r' % self.policy
            raise AssertionError(str(err) + policy_trailer)

    def _datafilename(self, timestamp, policy, frag_index=None):
        if frag_index is None:
            frag_index = randint(0, 9)
        filename = timestamp.internal
        if policy.policy_type == EC_POLICY:
            filename += '#%d' % frag_index
        filename += '.data'
        return filename

    def _metafilename(self, meta_timestamp, ctype_timestamp=None):
        filename = meta_timestamp.internal
        if ctype_timestamp is not None:
            delta = meta_timestamp.raw - ctype_timestamp.raw
            filename = '%s-%x' % (filename, delta)
        filename += '.meta'
        return filename

    def check_cleanup_ondisk_files(self, policy, input_files, output_files):
        orig_unlink = os.unlink
        file_list = list(input_files)

        def mock_listdir(path):
            return list(file_list)

        def mock_unlink(path):
            # timestamp 1 is a special tag to pretend a file disappeared
            # between the listdir and unlink.
            if '/0000000001.00000.' in path:
                # Using actual os.unlink for a non-existent name to reproduce
                # exactly what OSError it raises in order to prove that
                # common.utils.remove_file is squelching the error - but any
                # OSError would do.
                orig_unlink(uuid.uuid4().hex)
            file_list.remove(os.path.basename(path))

        df_mgr = self.df_router[policy]
        with unit_mock({'os.listdir': mock_listdir, 'os.unlink': mock_unlink}):
            if isinstance(output_files, Exception):
                path = os.path.join(self.testdir, 'does-not-matter')
                self.assertRaises(output_files.__class__,
                                  df_mgr.cleanup_ondisk_files, path)
                return
            files = df_mgr.cleanup_ondisk_files('/whatever')['files']
            self.assertEqual(files, output_files)

    # cleanup_ondisk_files tests - behaviors

    def test_cleanup_ondisk_files_purge_data_newer_ts(self):
        for policy in self.iter_policies():
            # purge .data if there's a newer .ts
            file1 = self._datafilename(self.ts(), policy)
            file2 = self.ts().internal + '.ts'
            file_list = [file1, file2]
            self.check_cleanup_ondisk_files(policy, file_list, [file2])

    def test_cleanup_ondisk_files_purge_expired_ts(self):
        for policy in self.iter_policies():
            # purge older .ts files if there's a newer .data
            file1 = self.ts().internal + '.ts'
            file2 = self.ts().internal + '.ts'
            timestamp = self.ts()
            file3 = self._datafilename(timestamp, policy)
            file_list = [file1, file2, file3]
            expected = {
                # no durable datafile means you can't get rid of the
                # latest tombstone even if datafile is newer
                EC_POLICY: [file3, file2],
                REPL_POLICY: [file3],
            }[policy.policy_type]
            self.check_cleanup_ondisk_files(policy, file_list, expected)

    def test_cleanup_ondisk_files_purge_ts_newer_data(self):
        for policy in self.iter_policies():
            # purge .ts if there's a newer .data
            file1 = self.ts().internal + '.ts'
            timestamp = self.ts()
            file2 = self._datafilename(timestamp, policy)
            file_list = [file1, file2]
            if policy.policy_type == EC_POLICY:
                durable_file = timestamp.internal + '.durable'
                file_list.append(durable_file)
            expected = {
                EC_POLICY: [durable_file, file2],
                REPL_POLICY: [file2],
            }[policy.policy_type]
            self.check_cleanup_ondisk_files(policy, file_list, expected)

    def test_cleanup_ondisk_files_purge_older_ts(self):
        for policy in self.iter_policies():
            file1 = self.ts().internal + '.ts'
            file2 = self.ts().internal + '.ts'
            file3 = self._datafilename(self.ts(), policy)
            file4 = self.ts().internal + '.meta'
            expected = {
                # no durable means we can only throw out things before
                # the latest tombstone
                EC_POLICY: [file4, file3, file2],
                # keep .meta and .data and purge all .ts files
                REPL_POLICY: [file4, file3],
            }[policy.policy_type]
            file_list = [file1, file2, file3, file4]
            self.check_cleanup_ondisk_files(policy, file_list, expected)

    def test_cleanup_ondisk_files_keep_meta_data_purge_ts(self):
        for policy in self.iter_policies():
            file1 = self.ts().internal + '.ts'
            file2 = self.ts().internal + '.ts'
            timestamp = self.ts()
            file3 = self._datafilename(timestamp, policy)
            file_list = [file1, file2, file3]
            if policy.policy_type == EC_POLICY:
                durable_filename = timestamp.internal + '.durable'
                file_list.append(durable_filename)
            file4 = self.ts().internal + '.meta'
            file_list.append(file4)
            # keep .meta and .data if meta newer than data and purge .ts
            expected = {
                EC_POLICY: [file4, durable_filename, file3],
                REPL_POLICY: [file4, file3],
            }[policy.policy_type]
            self.check_cleanup_ondisk_files(policy, file_list, expected)

    def test_cleanup_ondisk_files_keep_one_ts(self):
        for policy in self.iter_policies():
            file1, file2, file3 = [self.ts().internal + '.ts'
                                   for i in range(3)]
            file_list = [file1, file2, file3]
            # keep only latest of multiple .ts files
            self.check_cleanup_ondisk_files(policy, file_list, [file3])

    def test_cleanup_ondisk_files_multi_data_file(self):
        for policy in self.iter_policies():
            file1 = self._datafilename(self.ts(), policy, 1)
            file2 = self._datafilename(self.ts(), policy, 2)
            file3 = self._datafilename(self.ts(), policy, 3)
            expected = {
                # keep all non-durable datafiles
                EC_POLICY: [file3, file2, file1],
                # keep only latest of multiple .data files
                REPL_POLICY: [file3]
            }[policy.policy_type]
            file_list = [file1, file2, file3]
            self.check_cleanup_ondisk_files(policy, file_list, expected)

    def test_cleanup_ondisk_files_keeps_one_datafile(self):
        for policy in self.iter_policies():
            timestamps = [self.ts() for i in range(3)]
            file1 = self._datafilename(timestamps[0], policy, 1)
            file2 = self._datafilename(timestamps[1], policy, 2)
            file3 = self._datafilename(timestamps[2], policy, 3)
            file_list = [file1, file2, file3]
            if policy.policy_type == EC_POLICY:
                for t in timestamps:
                    file_list.append(t.internal + '.durable')
                latest_durable = file_list[-1]
            expected = {
                # keep latest durable and datafile
                EC_POLICY: [latest_durable, file3],
                # keep only latest of multiple .data files
                REPL_POLICY: [file3]
            }[policy.policy_type]
            self.check_cleanup_ondisk_files(policy, file_list, expected)

    def test_cleanup_ondisk_files_keep_one_meta(self):
        for policy in self.iter_policies():
            # keep only latest of multiple .meta files
            t_data = self.ts()
            file1 = self._datafilename(t_data, policy)
            file2, file3 = [self.ts().internal + '.meta' for i in range(2)]
            file_list = [file1, file2, file3]
            if policy.policy_type == EC_POLICY:
                durable_file = t_data.internal + '.durable'
                file_list.append(durable_file)
            expected = {
                EC_POLICY: [file3, durable_file, file1],
                REPL_POLICY: [file3, file1]
            }[policy.policy_type]
            self.check_cleanup_ondisk_files(policy, file_list, expected)

    def test_cleanup_ondisk_files_only_meta(self):
        for policy in self.iter_policies():
            file1, file2 = [self.ts().internal + '.meta' for i in range(2)]
            file_list = [file1, file2]
            self.check_cleanup_ondisk_files(policy, file_list, [file2])

    def test_cleanup_ondisk_files_ignore_orphaned_ts(self):
        for policy in self.iter_policies():
            # A more recent orphaned .meta file will prevent old .ts files
            # from being cleaned up otherwise
            file1, file2 = [self.ts().internal + '.ts' for i in range(2)]
            file3 = self.ts().internal + '.meta'
            file_list = [file1, file2, file3]
            self.check_cleanup_ondisk_files(policy, file_list, [file3, file2])

    def test_cleanup_ondisk_files_purge_old_data_only(self):
        for policy in self.iter_policies():
            # Oldest .data will be purge, .meta and .ts won't be touched
            file1 = self._datafilename(self.ts(), policy)
            file2 = self.ts().internal + '.ts'
            file3 = self.ts().internal + '.meta'
            file_list = [file1, file2, file3]
            self.check_cleanup_ondisk_files(policy, file_list, [file3, file2])

    def test_cleanup_ondisk_files_purge_old_ts(self):
        for policy in self.iter_policies():
            # A single old .ts file will be removed
            old_float = time() - (diskfile.ONE_WEEK + 1)
            file1 = Timestamp(old_float).internal + '.ts'
            file_list = [file1]
            self.check_cleanup_ondisk_files(policy, file_list, [])

    def test_cleanup_ondisk_files_keep_isolated_meta_purge_old_ts(self):
        for policy in self.iter_policies():
            # A single old .ts file will be removed despite presence of a .meta
            old_float = time() - (diskfile.ONE_WEEK + 1)
            file1 = Timestamp(old_float).internal + '.ts'
            file2 = Timestamp(time() + 2).internal + '.meta'
            file_list = [file1, file2]
            self.check_cleanup_ondisk_files(policy, file_list, [file2])

    def test_cleanup_ondisk_files_keep_single_old_data(self):
        for policy in self.iter_policies():
            old_float = time() - (diskfile.ONE_WEEK + 1)
            file1 = self._datafilename(Timestamp(old_float), policy)
            file_list = [file1]
            if policy.policy_type == EC_POLICY:
                # for EC an isolated old .data file is removed, its useless
                # without a .durable
                expected = []
            else:
                # A single old .data file will not be removed
                expected = file_list
            self.check_cleanup_ondisk_files(policy, file_list, expected)

    def test_cleanup_ondisk_files_drops_isolated_durable(self):
        for policy in self.iter_policies():
            if policy.policy_type == EC_POLICY:
                file1 = Timestamp(time()).internal + '.durable'
                file_list = [file1]
                self.check_cleanup_ondisk_files(policy, file_list, [])

    def test_cleanup_ondisk_files_purges_single_old_meta(self):
        for policy in self.iter_policies():
            # A single old .meta file will be removed
            old_float = time() - (diskfile.ONE_WEEK + 1)
            file1 = Timestamp(old_float).internal + '.meta'
            file_list = [file1]
            self.check_cleanup_ondisk_files(policy, file_list, [])

    # cleanup_ondisk_files tests - error handling

    def test_cleanup_ondisk_files_hsh_path_enoent(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            # common.utils.listdir *completely* mutes ENOENT
            path = os.path.join(self.testdir, 'does-not-exist')
            self.assertEqual(df_mgr.cleanup_ondisk_files(path)['files'], [])

    def test_cleanup_ondisk_files_hsh_path_other_oserror(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            with mock.patch('os.listdir') as mock_listdir:
                mock_listdir.side_effect = OSError('kaboom!')
                # but it will raise other OSErrors
                path = os.path.join(self.testdir, 'does-not-matter')
                self.assertRaises(OSError, df_mgr.cleanup_ondisk_files,
                                  path)

    def test_cleanup_ondisk_files_reclaim_tombstone_remove_file_error(self):
        for policy in self.iter_policies():
            # Timestamp 1 makes the check routine pretend the file
            # disappeared after listdir before unlink.
            file1 = '0000000001.00000.ts'
            file_list = [file1]
            self.check_cleanup_ondisk_files(policy, file_list, [])

    def test_cleanup_ondisk_files_older_remove_file_error(self):
        for policy in self.iter_policies():
            # Timestamp 1 makes the check routine pretend the file
            # disappeared after listdir before unlink.
            file1 = self._datafilename(Timestamp(1), policy)
            file2 = '0000000002.00000.ts'
            file_list = [file1, file2]
            self.check_cleanup_ondisk_files(policy, file_list, [])

    # invalidate_hash tests - behavior

    def test_invalidate_hash_file_does_not_exist(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
                                     policy=policy)
            suffix_dir = os.path.dirname(df._datadir)
            part_path = os.path.join(self.devices, 'sda1',
                                     diskfile.get_data_dir(policy), '0')
            hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
            inv_file = os.path.join(
                part_path, diskfile.HASH_INVALIDATIONS_FILE)
            self.assertFalse(os.path.exists(hashes_file))  # sanity
            with mock.patch('swift.obj.diskfile.lock_path') as mock_lock:
                df_mgr.invalidate_hash(suffix_dir)
            self.assertFalse(mock_lock.called)
            # does not create files
            self.assertFalse(os.path.exists(hashes_file))
            self.assertFalse(os.path.exists(inv_file))

    def test_invalidate_hash_file_exists(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            # create something to hash
            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
                                     policy=policy)
            df.delete(self.ts())
            suffix_dir = os.path.dirname(df._datadir)
            suffix = os.path.basename(suffix_dir)
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            self.assertIn(suffix, hashes)  # sanity
            # sanity check hashes file
            part_path = os.path.join(self.devices, 'sda1',
                                     diskfile.get_data_dir(policy), '0')
            hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
            invalidations_file = os.path.join(
                part_path, diskfile.HASH_INVALIDATIONS_FILE)
            with open(hashes_file, 'rb') as f:
                self.assertEqual(hashes, pickle.load(f))

            # invalidate the hash
            with mock.patch('swift.obj.diskfile.lock_path') as mock_lock:
                df_mgr.invalidate_hash(suffix_dir)
            self.assertTrue(mock_lock.called)
            with open(invalidations_file, 'rb') as f:
                self.assertEqual(suffix + "\n", f.read())

            # consolidate the hash and the invalidations
            with mock.patch('swift.obj.diskfile.lock_path') as mock_lock:
                hashes = df_mgr.consolidate_hashes(part_path)
            self.assertIsNone(hashes.get(suffix))

            with open(hashes_file, 'rb') as f:
                self.assertEqual(hashes, pickle.load(f))
            with open(invalidations_file, 'rb') as f:
                self.assertEqual("", f.read())

    # invalidate_hash tests - error handling

    def test_invalidate_hash_bad_pickle(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            # make some valid data
            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
                                     policy=policy)
            suffix_dir = os.path.dirname(df._datadir)
            suffix = os.path.basename(suffix_dir)
            df.delete(self.ts())
            # sanity check hashes file
            part_path = os.path.join(self.devices, 'sda1',
                                     diskfile.get_data_dir(policy), '0')
            hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
            self.assertFalse(os.path.exists(hashes_file))
            # write some garbage in hashes file
            with open(hashes_file, 'w') as f:
                f.write('asdf')
            # invalidate_hash silently *NOT* repair invalid data
            df_mgr.invalidate_hash(suffix_dir)
            with open(hashes_file) as f:
                self.assertEqual(f.read(), 'asdf')
            # ... but get_hashes will
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            self.assertIn(suffix, hashes)

    # get_hashes tests - hash_suffix behaviors

    def test_hash_suffix_one_tombstone(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile(
                'sda1', '0', 'a', 'c', 'o', policy=policy)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            # write a tombstone
            timestamp = self.ts()
            df.delete(timestamp)
            tombstone_hash = md5(timestamp.internal + '.ts').hexdigest()
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            expected = {
                REPL_POLICY: {suffix: tombstone_hash},
                EC_POLICY: {suffix: {
                    # fi is None here because we have a tombstone
                    None: tombstone_hash}},
            }[policy.policy_type]
            self.assertEqual(hashes, expected)

    def test_hash_suffix_one_tombstone_and_one_meta(self):
        # A tombstone plus a newer meta file can happen if a tombstone is
        # replicated to a node with a newer meta file but older data file. The
        # meta file will be ignored when the diskfile is opened so the
        # effective state of the disk files is equivalent to only having the
        # tombstone. Replication cannot remove the meta file, and the meta file
        # cannot be ssync replicated to a node with only the tombstone, so
        # we want the get_hashes result to be the same as if the meta file was
        # not there.
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile(
                'sda1', '0', 'a', 'c', 'o', policy=policy)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            # write a tombstone
            timestamp = self.ts()
            df.delete(timestamp)
            # write a meta file
            df.write_metadata({'X-Timestamp': self.ts().internal})
            # sanity check
            self.assertEqual(2, len(os.listdir(df._datadir)))
            tombstone_hash = md5(timestamp.internal + '.ts').hexdigest()
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            expected = {
                REPL_POLICY: {suffix: tombstone_hash},
                EC_POLICY: {suffix: {
                    # fi is None here because we have a tombstone
                    None: tombstone_hash}},
            }[policy.policy_type]
            self.assertEqual(hashes, expected)

    def test_hash_suffix_one_reclaim_tombstone_and_one_meta(self):
        # An isolated meta file can happen if a tombstone is replicated to a
        # node with a newer meta file but older data file, and the tombstone is
        # subsequently reclaimed. The meta file will be ignored when the
        # diskfile is opened so the effective state of the disk files is
        # equivalent to having no files.
        for policy in self.iter_policies():
            if policy.policy_type == EC_POLICY:
                continue
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile(
                'sda1', '0', 'a', 'c', 'o', policy=policy)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            now = time()
            # scale back the df manager's reclaim age a bit
            df_mgr.reclaim_age = 1000
            # write a tombstone that's just a *little* older than reclaim time
            df.delete(Timestamp(now - 10001))
            # write a meta file that's not quite so old
            ts_meta = Timestamp(now - 501)
            df.write_metadata({'X-Timestamp': ts_meta.internal})
            # sanity check
            self.assertEqual(2, len(os.listdir(df._datadir)))

            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            # the tombstone is reclaimed, the meta file remains, the suffix
            # hash is not updated BUT the suffix dir cannot be deleted so
            # a suffix hash equal to hash of empty string is reported.
            # TODO: this is not same result as if the meta file did not exist!
            self.assertEqual([ts_meta.internal + '.meta'],
                             os.listdir(df._datadir))
            self.assertEqual(hashes, {suffix: MD5_OF_EMPTY_STRING})

            # scale back the df manager's reclaim age even more - call to
            # get_hashes does not trigger reclaim because the suffix has
            # MD5_OF_EMPTY_STRING in hashes.pkl
            df_mgr.reclaim_age = 500
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            self.assertEqual([ts_meta.internal + '.meta'],
                             os.listdir(df._datadir))
            self.assertEqual(hashes, {suffix: MD5_OF_EMPTY_STRING})

            # call get_hashes with recalculate = [suffix] and the suffix dir
            # gets re-hashed so the .meta if finally reclaimed.
            hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
            self.assertFalse(os.path.exists(os.path.dirname(df._datadir)))
            self.assertEqual(hashes, {})

    def test_hash_suffix_one_reclaim_tombstone(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile(
                'sda1', '0', 'a', 'c', 'o', policy=policy)
            # scale back this tests manager's reclaim age a bit
            df_mgr.reclaim_age = 1000
            # write a tombstone that's just a *little* older
            old_time = time() - 1001
            timestamp = Timestamp(old_time)
            df.delete(timestamp.internal)
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            self.assertEqual(hashes, {})

    def test_hash_suffix_one_reclaim_and_one_valid_tombstone(self):
        for policy in self.iter_policies():
            paths, suffix = find_paths_with_matching_suffixes(2, 1)
            df_mgr = self.df_router[policy]
            a, c, o = paths[suffix][0]
            df1 = df_mgr.get_diskfile(
                'sda1', '0', a, c, o, policy=policy)
            # scale back this tests manager's reclaim age a bit
            df_mgr.reclaim_age = 1000
            # write one tombstone that's just a *little* older
            df1.delete(Timestamp(time() - 1001))
            # create another tombstone in same suffix dir that's newer
            a, c, o = paths[suffix][1]
            df2 = df_mgr.get_diskfile(
                'sda1', '0', a, c, o, policy=policy)
            t_df2 = Timestamp(time() - 900)
            df2.delete(t_df2)

            hashes = df_mgr.get_hashes('sda1', '0', [], policy)

            suffix = os.path.basename(os.path.dirname(df1._datadir))
            df2_tombstone_hash = md5(t_df2.internal + '.ts').hexdigest()
            expected = {
                REPL_POLICY: {suffix: df2_tombstone_hash},
                EC_POLICY: {suffix: {
                    # fi is None here because we have a tombstone
                    None: df2_tombstone_hash}},
            }[policy.policy_type]

            self.assertEqual(hashes, expected)

    def test_hash_suffix_one_datafile(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile(
                'sda1', '0', 'a', 'c', 'o', policy=policy, frag_index=7)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            # write a datafile
            timestamp = self.ts()
            with df.create() as writer:
                test_data = 'test file'
                writer.write(test_data)
                metadata = {
                    'X-Timestamp': timestamp.internal,
                    'ETag': md5(test_data).hexdigest(),
                    'Content-Length': len(test_data),
                }
                writer.put(metadata)
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            datafile_hash = md5({
                EC_POLICY: timestamp.internal,
                REPL_POLICY: timestamp.internal + '.data',
            }[policy.policy_type]).hexdigest()
            expected = {
                REPL_POLICY: {suffix: datafile_hash},
                EC_POLICY: {suffix: {
                    # because there's no .durable file, we have no hash for
                    # the None key - only the frag index for the data file
                    7: datafile_hash}},
            }[policy.policy_type]
            msg = 'expected %r != %r for policy %r' % (
                expected, hashes, policy)
            self.assertEqual(hashes, expected, msg)

    def test_hash_suffix_multi_file_ends_in_tombstone(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy,
                                     frag_index=4)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            mkdirs(df._datadir)
            now = time()
            # go behind the scenes and setup a bunch of weird file names
            for tdiff in [500, 100, 10, 1]:
                for suff in ['.meta', '.data', '.ts']:
                    timestamp = Timestamp(now - tdiff)
                    filename = timestamp.internal
                    if policy.policy_type == EC_POLICY and suff == '.data':
                        filename += '#%s' % df._frag_index
                    filename += suff
                    open(os.path.join(df._datadir, filename), 'w').close()
            tombstone_hash = md5(filename).hexdigest()
            # call get_hashes and it should clean things up
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            expected = {
                REPL_POLICY: {suffix: tombstone_hash},
                EC_POLICY: {suffix: {
                    # fi is None here because we have a tombstone
                    None: tombstone_hash}},
            }[policy.policy_type]
            self.assertEqual(hashes, expected)
            # only the tombstone should be left
            found_files = os.listdir(df._datadir)
            self.assertEqual(found_files, [filename])

    def test_hash_suffix_multi_file_ends_in_datafile(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy,
                                     frag_index=4)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            mkdirs(df._datadir)
            now = time()
            timestamp = None
            # go behind the scenes and setup a bunch of weird file names
            for tdiff in [500, 100, 10, 1]:
                suffs = ['.meta', '.data']
                if tdiff > 50:
                    suffs.append('.ts')
                if policy.policy_type == EC_POLICY:
                    suffs.append('.durable')
                for suff in suffs:
                    timestamp = Timestamp(now - tdiff)
                    filename = timestamp.internal
                    if policy.policy_type == EC_POLICY and suff == '.data':
                        filename += '#%s' % df._frag_index
                    filename += suff
                    open(os.path.join(df._datadir, filename), 'w').close()
            meta_timestamp = Timestamp(now)
            metadata_filename = meta_timestamp.internal + '.meta'
            open(os.path.join(df._datadir, metadata_filename), 'w').close()

            # call get_hashes and it should clean things up
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)

            data_filename = timestamp.internal
            if policy.policy_type == EC_POLICY:
                data_filename += '#%s' % df._frag_index
            data_filename += '.data'
            if policy.policy_type == EC_POLICY:
                durable_filename = timestamp.internal + '.durable'
                hasher = md5()
                hasher.update(metadata_filename)
                hasher.update(durable_filename)
                expected = {
                    suffix: {
                        # metadata & durable updates are hashed separately
                        None: hasher.hexdigest(),
                        4: self.fname_to_ts_hash(data_filename),
                    }
                }
                expected_files = [data_filename, durable_filename,
                                  metadata_filename]
            elif policy.policy_type == REPL_POLICY:
                hasher = md5()
                hasher.update(metadata_filename)
                hasher.update(data_filename)
                expected = {suffix: hasher.hexdigest()}
                expected_files = [data_filename, metadata_filename]
            else:
                self.fail('unknown policy type %r' % policy.policy_type)
            msg = 'expected %r != %r for policy %r' % (
                expected, hashes, policy)
            self.assertEqual(hashes, expected, msg)
            # only the meta and data should be left
            self.assertEqual(sorted(os.listdir(df._datadir)),
                             sorted(expected_files))

    def _verify_get_hashes(self, filenames, ts_data, ts_meta, ts_ctype,
                           policy):
        """
        Helper method to create a set of ondisk files and verify suffix_hashes.

        :param filenames: list of filenames to create in an object hash dir
        :param ts_data: newest data timestamp, used for expected result
        :param ts_meta: newest meta timestamp, used for expected result
        :param ts_ctype: newest content-type timestamp, used for expected
                         result
        :param policy: storage policy to use for test
        """
        df_mgr = self.df_router[policy]
        df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
                                 policy=policy, frag_index=4)
        suffix = os.path.basename(os.path.dirname(df._datadir))
        mkdirs(df._datadir)

        # calculate expected result
        hasher = md5()
        if policy.policy_type == EC_POLICY:
            hasher.update(ts_meta.internal + '.meta')
            hasher.update(ts_data.internal + '.durable')
            if ts_ctype:
                hasher.update(ts_ctype.internal + '_ctype')
            expected = {
                suffix: {
                    None: hasher.hexdigest(),
                    4: md5(ts_data.internal).hexdigest(),
                }
            }
        elif policy.policy_type == REPL_POLICY:
            hasher.update(ts_meta.internal + '.meta')
            hasher.update(ts_data.internal + '.data')
            if ts_ctype:
                hasher.update(ts_ctype.internal + '_ctype')
            expected = {suffix: hasher.hexdigest()}
        else:
            self.fail('unknown policy type %r' % policy.policy_type)

        for fname in filenames:
            open(os.path.join(df._datadir, fname), 'w').close()

        hashes = df_mgr.get_hashes('sda1', '0', [], policy)

        msg = 'expected %r != %r for policy %r' % (
            expected, hashes, policy)
        self.assertEqual(hashes, expected, msg)

    def test_hash_suffix_with_older_content_type_in_meta(self):
        # single meta file having older content-type
        for policy in self.iter_policies():
            ts_data, ts_ctype, ts_meta = (
                self.ts(), self.ts(), self.ts())

            filenames = [self._datafilename(ts_data, policy, frag_index=4),
                         self._metafilename(ts_meta, ts_ctype)]
            if policy.policy_type == EC_POLICY:
                filenames.append(ts_data.internal + '.durable')

            self._verify_get_hashes(
                filenames, ts_data, ts_meta, ts_ctype, policy)

    def test_hash_suffix_with_same_age_content_type_in_meta(self):
        # single meta file having same age content-type
        for policy in self.iter_policies():
            ts_data, ts_meta = (self.ts(), self.ts())

            filenames = [self._datafilename(ts_data, policy, frag_index=4),
                         self._metafilename(ts_meta, ts_meta)]
            if policy.policy_type == EC_POLICY:
                filenames.append(ts_data.internal + '.durable')

            self._verify_get_hashes(
                filenames, ts_data, ts_meta, ts_meta, policy)

    def test_hash_suffix_with_obsolete_content_type_in_meta(self):
        # After rsync replication we could have a single meta file having
        # content-type older than a replicated data file
        for policy in self.iter_policies():
            ts_ctype, ts_data, ts_meta = (self.ts(), self.ts(), self.ts())

            filenames = [self._datafilename(ts_data, policy, frag_index=4),
                         self._metafilename(ts_meta, ts_ctype)]
            if policy.policy_type == EC_POLICY:
                filenames.append(ts_data.internal + '.durable')

            self._verify_get_hashes(
                filenames, ts_data, ts_meta, None, policy)

    def test_hash_suffix_with_older_content_type_in_newer_meta(self):
        # After rsync replication we could have two meta files: newest
        # content-type is in newer meta file, older than newer meta file
        for policy in self.iter_policies():
            ts_data, ts_older_meta, ts_ctype, ts_newer_meta = (
                self.ts() for _ in range(4))

            filenames = [self._datafilename(ts_data, policy, frag_index=4),
                         self._metafilename(ts_older_meta),
                         self._metafilename(ts_newer_meta, ts_ctype)]
            if policy.policy_type == EC_POLICY:
                filenames.append(ts_data.internal + '.durable')

            self._verify_get_hashes(
                filenames, ts_data, ts_newer_meta, ts_ctype, policy)

    def test_hash_suffix_with_same_age_content_type_in_newer_meta(self):
        # After rsync replication we could have two meta files: newest
        # content-type is in newer meta file, at same age as newer meta file
        for policy in self.iter_policies():
            ts_data, ts_older_meta, ts_newer_meta = (
                self.ts() for _ in range(3))

            filenames = [self._datafilename(ts_data, policy, frag_index=4),
                         self._metafilename(ts_newer_meta, ts_newer_meta)]
            if policy.policy_type == EC_POLICY:
                filenames.append(ts_data.internal + '.durable')

            self._verify_get_hashes(
                filenames, ts_data, ts_newer_meta, ts_newer_meta, policy)

    def test_hash_suffix_with_older_content_type_in_older_meta(self):
        # After rsync replication we could have two meta files: newest
        # content-type is in older meta file, older than older meta file
        for policy in self.iter_policies():
            ts_data, ts_ctype, ts_older_meta, ts_newer_meta = (
                self.ts() for _ in range(4))

            filenames = [self._datafilename(ts_data, policy, frag_index=4),
                         self._metafilename(ts_newer_meta),
                         self._metafilename(ts_older_meta, ts_ctype)]
            if policy.policy_type == EC_POLICY:
                filenames.append(ts_data.internal + '.durable')

            self._verify_get_hashes(
                filenames, ts_data, ts_newer_meta, ts_ctype, policy)

    def test_hash_suffix_with_same_age_content_type_in_older_meta(self):
        # After rsync replication we could have two meta files: newest
        # content-type is in older meta file, at same age as older meta file
        for policy in self.iter_policies():
            ts_data, ts_older_meta, ts_newer_meta = (
                self.ts() for _ in range(3))

            filenames = [self._datafilename(ts_data, policy, frag_index=4),
                         self._metafilename(ts_newer_meta),
                         self._metafilename(ts_older_meta, ts_older_meta)]
            if policy.policy_type == EC_POLICY:
                filenames.append(ts_data.internal + '.durable')

            self._verify_get_hashes(
                filenames, ts_data, ts_newer_meta, ts_older_meta, policy)

    def test_hash_suffix_with_obsolete_content_type_in_older_meta(self):
        # After rsync replication we could have two meta files: newest
        # content-type is in older meta file, but older than data file
        for policy in self.iter_policies():
            ts_ctype, ts_data, ts_older_meta, ts_newer_meta = (
                self.ts() for _ in range(4))

            filenames = [self._datafilename(ts_data, policy, frag_index=4),
                         self._metafilename(ts_newer_meta),
                         self._metafilename(ts_older_meta, ts_ctype)]
            if policy.policy_type == EC_POLICY:
                filenames.append(ts_data.internal + '.durable')

            self._verify_get_hashes(
                filenames, ts_data, ts_newer_meta, None, policy)

    def test_hash_suffix_removes_empty_hashdir_and_suffix(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
                                     policy=policy, frag_index=2)
            os.makedirs(df._datadir)
            self.assertTrue(os.path.exists(df._datadir))  # sanity
            df_mgr.get_hashes('sda1', '0', [], policy)
            suffix_dir = os.path.dirname(df._datadir)
            self.assertFalse(os.path.exists(suffix_dir))

    def test_hash_suffix_removes_empty_hashdirs_in_valid_suffix(self):
        paths, suffix = find_paths_with_matching_suffixes(needed_matches=3,
                                                          needed_suffixes=0)
        matching_paths = paths.pop(suffix)
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile('sda1', '0', *matching_paths[0],
                                     policy=policy, frag_index=2)
            # create a real, valid hsh_path
            df.delete(Timestamp(time()))
            # and a couple of empty hsh_paths
            empty_hsh_paths = []
            for path in matching_paths[1:]:
                fake_df = df_mgr.get_diskfile('sda1', '0', *path,
                                              policy=policy)
                os.makedirs(fake_df._datadir)
                empty_hsh_paths.append(fake_df._datadir)
            for hsh_path in empty_hsh_paths:
                self.assertTrue(os.path.exists(hsh_path))  # sanity
            # get_hashes will cleanup empty hsh_path and leave valid one
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            self.assertIn(suffix, hashes)
            self.assertTrue(os.path.exists(df._datadir))
            for hsh_path in empty_hsh_paths:
                self.assertFalse(os.path.exists(hsh_path))

    # get_hashes tests - hash_suffix error handling

    def test_hash_suffix_listdir_enotdir(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            suffix = '123'
            suffix_path = os.path.join(self.devices, 'sda1',
                                       diskfile.get_data_dir(policy), '0',
                                       suffix)
            os.makedirs(suffix_path)
            self.assertTrue(os.path.exists(suffix_path))  # sanity
            hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
            # suffix dir cleaned up by get_hashes
            self.assertFalse(os.path.exists(suffix_path))
            expected = {}
            msg = 'expected %r != %r for policy %r' % (
                expected, hashes, policy)
            self.assertEqual(hashes, expected, msg)

            # now make the suffix path a file
            open(suffix_path, 'w').close()
            hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
            expected = {}
            msg = 'expected %r != %r for policy %r' % (
                expected, hashes, policy)
            self.assertEqual(hashes, expected, msg)

    def test_hash_suffix_listdir_enoent(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            orig_listdir = os.listdir
            listdir_calls = []

            def mock_listdir(path):
                success = False
                try:
                    rv = orig_listdir(path)
                    success = True
                    return rv
                finally:
                    listdir_calls.append((path, success))

            with mock.patch('swift.obj.diskfile.os.listdir',
                            mock_listdir):
                # recalc always forces hash_suffix even if the suffix
                # does not exist!
                df_mgr.get_hashes('sda1', '0', ['123'], policy)

            part_path = os.path.join(self.devices, 'sda1',
                                     diskfile.get_data_dir(policy), '0')

            self.assertEqual(listdir_calls, [
                # part path gets created automatically
                (part_path, True),
                # this one blows up
                (os.path.join(part_path, '123'), False),
            ])

    def test_hash_suffix_cleanup_ondisk_files_enotdir_quarantined(self):
        for policy in self.iter_policies():
            df = self.df_router[policy].get_diskfile(
                self.existing_device, '0', 'a', 'c', 'o', policy=policy)
            # make the suffix directory
            suffix_path = os.path.dirname(df._datadir)
            os.makedirs(suffix_path)
            suffix = os.path.basename(suffix_path)

            # make the df hash path a file
            open(df._datadir, 'wb').close()
            df_mgr = self.df_router[policy]
            hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
                                       policy)
            self.assertEqual(hashes, {})
            # and hash path is quarantined
            self.assertFalse(os.path.exists(df._datadir))
            # each device a quarantined directory
            quarantine_base = os.path.join(self.devices,
                                           self.existing_device, 'quarantined')
            # the quarantine path is...
            quarantine_path = os.path.join(
                quarantine_base,  # quarantine root
                diskfile.get_data_dir(policy),  # per-policy data dir
                suffix,  # first dir from which quarantined file was removed
                os.path.basename(df._datadir)  # name of quarantined file
            )
            self.assertTrue(os.path.exists(quarantine_path))

    def test_hash_suffix_cleanup_ondisk_files_other_oserror(self):
        for policy in self.iter_policies():
            timestamp = self.ts()
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c',
                                     'o', policy=policy,
                                     frag_index=7)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            with df.create() as writer:
                test_data = 'test_data'
                writer.write(test_data)
                metadata = {
                    'X-Timestamp': timestamp.internal,
                    'ETag': md5(test_data).hexdigest(),
                    'Content-Length': len(test_data),
                }
                writer.put(metadata)

            orig_os_listdir = os.listdir
            listdir_calls = []

            part_path = os.path.join(self.devices, self.existing_device,
                                     diskfile.get_data_dir(policy), '0')
            suffix_path = os.path.join(part_path, suffix)
            datadir_path = os.path.join(suffix_path, hash_path('a', 'c', 'o'))

            def mock_os_listdir(path):
                listdir_calls.append(path)
                if path == datadir_path:
                    # we want the part and suffix listdir calls to pass and
                    # make the cleanup_ondisk_files raise an exception
                    raise OSError(errno.EACCES, os.strerror(errno.EACCES))
                return orig_os_listdir(path)

            with mock.patch('os.listdir', mock_os_listdir):
                hashes = df_mgr.get_hashes(self.existing_device, '0', [],
                                           policy)

            self.assertEqual(listdir_calls, [
                part_path,
                suffix_path,
                datadir_path,
            ])
            expected = {suffix: None}
            msg = 'expected %r != %r for policy %r' % (
                expected, hashes, policy)
            self.assertEqual(hashes, expected, msg)

    def test_hash_suffix_rmdir_hsh_path_oserror(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            # make an empty hsh_path to be removed
            df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c',
                                     'o', policy=policy)
            os.makedirs(df._datadir)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            with mock.patch('os.rmdir', side_effect=OSError()):
                hashes = df_mgr.get_hashes(self.existing_device, '0', [],
                                           policy)
            expected = {
                EC_POLICY: {},
                REPL_POLICY: md5().hexdigest(),
            }[policy.policy_type]
            self.assertEqual(hashes, {suffix: expected})
            self.assertTrue(os.path.exists(df._datadir))

    def test_hash_suffix_rmdir_suffix_oserror(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            # make an empty hsh_path to be removed
            df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c',
                                     'o', policy=policy)
            os.makedirs(df._datadir)
            suffix_path = os.path.dirname(df._datadir)
            suffix = os.path.basename(suffix_path)

            captured_paths = []

            def mock_rmdir(path):
                captured_paths.append(path)
                if path == suffix_path:
                    raise OSError('kaboom!')

            with mock.patch('os.rmdir', mock_rmdir):
                hashes = df_mgr.get_hashes(self.existing_device, '0', [],
                                           policy)
            expected = {
                EC_POLICY: {},
                REPL_POLICY: md5().hexdigest(),
            }[policy.policy_type]
            self.assertEqual(hashes, {suffix: expected})
            self.assertTrue(os.path.exists(suffix_path))
            self.assertEqual([
                df._datadir,
                suffix_path,
            ], captured_paths)

    # get_hashes tests - behaviors

    def test_get_hashes_creates_partition_and_pkl(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            hashes = df_mgr.get_hashes(self.existing_device, '0', [],
                                       policy)
            self.assertEqual(hashes, {})
            part_path = os.path.join(
                self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
            self.assertTrue(os.path.exists(part_path))
            hashes_file = os.path.join(part_path,
                                       diskfile.HASH_FILE)
            self.assertTrue(os.path.exists(hashes_file))

            # and double check the hashes
            new_hashes = df_mgr.get_hashes(self.existing_device, '0', [],
                                           policy)
            self.assertEqual(hashes, new_hashes)

    def test_get_hashes_new_pkl_finds_new_suffix_dirs(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            part_path = os.path.join(
                self.devices, self.existing_device,
                diskfile.get_data_dir(policy), '0')
            hashes_file = os.path.join(part_path,
                                       diskfile.HASH_FILE)
            # add something to find
            df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c',
                                     'o', policy=policy, frag_index=4)
            timestamp = self.ts()
            df.delete(timestamp)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            # get_hashes will find the untracked suffix dir
            self.assertFalse(os.path.exists(hashes_file))  # sanity
            hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy)
            self.assertIn(suffix, hashes)
            # ... and create a hashes pickle for it
            self.assertTrue(os.path.exists(hashes_file))

    def test_get_hashes_old_pickle_does_not_find_new_suffix_dirs(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            # create a empty stale pickle
            part_path = os.path.join(
                self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
            hashes_file = os.path.join(part_path,
                                       diskfile.HASH_FILE)
            hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy)
            self.assertEqual(hashes, {})
            self.assertTrue(os.path.exists(hashes_file))  # sanity
            # add something to find
            df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o',
                                     policy=policy, frag_index=4)
            os.makedirs(df._datadir)
            filename = Timestamp(time()).internal + '.ts'
            open(os.path.join(df._datadir, filename), 'w').close()
            suffix = os.path.basename(os.path.dirname(df._datadir))
            # but get_hashes has no reason to find it (because we didn't
            # call invalidate_hash)
            new_hashes = df_mgr.get_hashes(self.existing_device, '0', [],
                                           policy)
            self.assertEqual(new_hashes, hashes)
            # ... unless remote end asks for a recalc
            hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
                                       policy)
            self.assertIn(suffix, hashes)

    def test_get_hashes_does_not_rehash_known_suffix_dirs(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c',
                                     'o', policy=policy, frag_index=4)
            suffix = os.path.basename(os.path.dirname(df._datadir))
            timestamp = self.ts()
            df.delete(timestamp)
            # create the baseline hashes file
            hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy)
            self.assertIn(suffix, hashes)
            # now change the contents of the suffix w/o calling
            # invalidate_hash
            rmtree(df._datadir)
            suffix_path = os.path.dirname(df._datadir)
            self.assertTrue(os.path.exists(suffix_path))  # sanity
            new_hashes = df_mgr.get_hashes(self.existing_device, '0', [],
                                           policy)
            # ... and get_hashes is none the wiser
            self.assertEqual(new_hashes, hashes)

            # ... unless remote end asks for a recalc
            hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
                                       policy)
            self.assertNotEqual(new_hashes, hashes)
            # and the empty suffix path is removed
            self.assertFalse(os.path.exists(suffix_path))
            # ... and the suffix key is removed
            expected = {}
            self.assertEqual(expected, hashes)

    def test_get_hashes_multi_file_multi_suffix(self):
        paths, suffix = find_paths_with_matching_suffixes(needed_matches=2,
                                                          needed_suffixes=3)
        matching_paths = paths.pop(suffix)
        matching_paths.sort(key=lambda path: hash_path(*path))
        other_paths = []
        for suffix, paths in paths.items():
            other_paths.append(paths[0])
            if len(other_paths) >= 2:
                break
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            # first we'll make a tombstone
            df = df_mgr.get_diskfile(self.existing_device, '0',
                                     *other_paths[0], policy=policy,
                                     frag_index=4)
            timestamp = self.ts()
            df.delete(timestamp)
            tombstone_hash = md5(timestamp.internal + '.ts').hexdigest()
            tombstone_suffix = os.path.basename(os.path.dirname(df._datadir))
            # second file in another suffix has a .datafile
            df = df_mgr.get_diskfile(self.existing_device, '0',
                                     *other_paths[1], policy=policy,
                                     frag_index=5)
            timestamp = self.ts()
            with df.create() as writer:
                test_data = 'test_file'
                writer.write(test_data)
                metadata = {
                    'X-Timestamp': timestamp.internal,
                    'ETag': md5(test_data).hexdigest(),
                    'Content-Length': len(test_data),
                }
                writer.put(metadata)
                writer.commit(timestamp)
            datafile_name = timestamp.internal
            if policy.policy_type == EC_POLICY:
                datafile_name += '#%d' % df._frag_index
            datafile_name += '.data'
            durable_hash = md5(timestamp.internal + '.durable').hexdigest()
            datafile_suffix = os.path.basename(os.path.dirname(df._datadir))
            # in the *third* suffix - two datafiles for different hashes
            df = df_mgr.get_diskfile(self.existing_device, '0',
                                     *matching_paths[0], policy=policy,
                                     frag_index=6)
            matching_suffix = os.path.basename(os.path.dirname(df._datadir))
            timestamp = self.ts()
            with df.create() as writer:
                test_data = 'test_file'
                writer.write(test_data)
                metadata = {
                    'X-Timestamp': timestamp.internal,
                    'ETag': md5(test_data).hexdigest(),
                    'Content-Length': len(test_data),
                }
                writer.put(metadata)
                writer.commit(timestamp)
            # we'll keep track of file names for hash calculations
            filename = timestamp.internal
            if policy.policy_type == EC_POLICY:
                filename += '#%d' % df._frag_index
            filename += '.data'
            filenames = {
                'data': {
                    6: filename
                },
                'durable': [timestamp.internal + '.durable'],
            }
            df = df_mgr.get_diskfile(self.existing_device, '0',
                                     *matching_paths[1], policy=policy,
                                     frag_index=7)
            self.assertEqual(os.path.basename(os.path.dirname(df._datadir)),
                             matching_suffix)  # sanity
            timestamp = self.ts()
            with df.create() as writer:
                test_data = 'test_file'
                writer.write(test_data)
                metadata = {
                    'X-Timestamp': timestamp.internal,
                    'ETag': md5(test_data).hexdigest(),
                    'Content-Length': len(test_data),
                }
                writer.put(metadata)
                writer.commit(timestamp)
            filename = timestamp.internal
            if policy.policy_type == EC_POLICY:
                filename += '#%d' % df._frag_index
            filename += '.data'
            filenames['data'][7] = filename
            filenames['durable'].append(timestamp.internal + '.durable')
            # now make up the expected suffixes!
            if policy.policy_type == EC_POLICY:
                hasher = md5()
                for filename in filenames['durable']:
                    hasher.update(filename)
                expected = {
                    tombstone_suffix: {
                        None: tombstone_hash,
                    },
                    datafile_suffix: {
                        None: durable_hash,
                        5: self.fname_to_ts_hash(datafile_name),
                    },
                    matching_suffix: {
                        None: hasher.hexdigest(),
                        6: self.fname_to_ts_hash(filenames['data'][6]),
                        7: self.fname_to_ts_hash(filenames['data'][7]),
                    },
                }
            elif policy.policy_type == REPL_POLICY:
                hasher = md5()
                for filename in filenames['data'].values():
                    hasher.update(filename)
                expected = {
                    tombstone_suffix: tombstone_hash,
                    datafile_suffix: md5(datafile_name).hexdigest(),
                    matching_suffix: hasher.hexdigest(),
                }
            else:
                self.fail('unknown policy type %r' % policy.policy_type)
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            self.assertEqual(hashes, expected)

    # get_hashes tests - error handling

    def test_get_hashes_bad_dev(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df_mgr.mount_check = True
            with mock.patch('swift.obj.diskfile.check_mount',
                            mock.MagicMock(side_effect=[False])):
                self.assertRaises(
                    DiskFileDeviceUnavailable,
                    df_mgr.get_hashes, self.existing_device, '0', ['123'],
                    policy)

    def test_get_hashes_zero_bytes_pickle(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            part_path = os.path.join(self.devices, self.existing_device,
                                     diskfile.get_data_dir(policy), '0')
            os.makedirs(part_path)
            # create a pre-existing zero-byte file
            open(os.path.join(part_path, diskfile.HASH_FILE), 'w').close()
            hashes = df_mgr.get_hashes(self.existing_device, '0', [],
                                       policy)
            self.assertEqual(hashes, {})

    def test_get_hashes_hash_suffix_enotdir(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            # create a real suffix dir
            df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c',
                                     'o', policy=policy, frag_index=3)
            df.delete(Timestamp(time()))
            suffix = os.path.basename(os.path.dirname(df._datadir))
            # touch a bad suffix dir
            part_dir = os.path.join(self.devices, self.existing_device,
                                    diskfile.get_data_dir(policy), '0')
            open(os.path.join(part_dir, 'bad'), 'w').close()
            hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy)
            self.assertIn(suffix, hashes)
            self.assertNotIn('bad', hashes)

    def test_get_hashes_hash_suffix_other_oserror(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            suffix = '123'
            suffix_path = os.path.join(self.devices, self.existing_device,
                                       diskfile.get_data_dir(policy), '0',
                                       suffix)
            os.makedirs(suffix_path)
            self.assertTrue(os.path.exists(suffix_path))  # sanity
            hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
                                       policy)
            expected = {}
            msg = 'expected %r != %r for policy %r' % (expected, hashes,
                                                       policy)
            self.assertEqual(hashes, expected, msg)

            # this OSError does *not* raise PathNotDir, and is allowed to leak
            # from hash_suffix into get_hashes
            mocked_os_listdir = mock.Mock(
                side_effect=OSError(errno.EACCES, os.strerror(errno.EACCES)))
            with mock.patch("os.listdir", mocked_os_listdir):
                with mock.patch('swift.obj.diskfile.logging') as mock_logging:
                    hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
            self.assertEqual(mock_logging.method_calls,
                             [mock.call.exception('Error hashing suffix')])
            # recalc always causes a suffix to get reset to None; the listdir
            # error prevents the suffix from being rehashed
            expected = {'123': None}
            msg = 'expected %r != %r for policy %r' % (expected, hashes,
                                                       policy)
            self.assertEqual(hashes, expected, msg)

    def test_get_hashes_modified_recursive_retry(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            # first create an empty pickle
            df_mgr.get_hashes(self.existing_device, '0', [], policy)
            hashes_file = os.path.join(
                self.devices, self.existing_device,
                diskfile.get_data_dir(policy), '0', diskfile.HASH_FILE)
            mtime = os.path.getmtime(hashes_file)
            non_local = {'mtime': mtime}

            calls = []

            def mock_getmtime(filename):
                t = non_local['mtime']
                if len(calls) <= 3:
                    # this will make the *next* call get a slightly
                    # newer mtime than the last
                    non_local['mtime'] += 1
                # track exactly the value for every return
                calls.append(t)
                return t
            with mock.patch('swift.obj.diskfile.getmtime',
                            mock_getmtime):
                df_mgr.get_hashes(self.existing_device, '0', ['123'],
                                  policy)

            self.assertEqual(calls, [
                mtime + 0,  # read
                mtime + 1,  # modified
                mtime + 2,  # read
                mtime + 3,  # modifed
                mtime + 4,  # read
                mtime + 4,  # not modifed
            ])


if __name__ == '__main__':
    unittest.main()