diff --git a/swiftonfile/swift/obj/diskfile.py b/swiftonfile/swift/obj/diskfile.py index f6eb546..a549327 100644 --- a/swiftonfile/swift/obj/diskfile.py +++ b/swiftonfile/swift/obj/diskfile.py @@ -651,11 +651,13 @@ class DiskFile(object): raise DiskFileNotExist raise try: - self._stat = do_fstat(self._fd) + if not self._stat: + self._stat = do_fstat(self._fd) self._is_dir = stat.S_ISDIR(self._stat.st_mode) obj_size = self._stat.st_size - self._metadata = read_metadata(self._fd) + if not self._metadata: + self._metadata = read_metadata(self._fd) if not validate_object(self._metadata, self._stat): self._metadata = create_object_metadata(self._fd, self._stat, self._metadata) @@ -768,12 +770,43 @@ class DiskFile(object): This method is invoked by Swift code in POST, PUT, HEAD and DELETE path metadata = disk_file.read_metadata() + The operations performed here is very similar to those made in open(). + This is to avoid opening and closing of file (two syscalls over + network). IOW, this optimization addresses the case where the fd + returned by open() isn't going to be used i.e the file is not read (GET + or metadata recalculation) + :returns: metadata dictionary for an object :raises DiskFileError: this implementation will raise the same errors as the `open()` method. """ - with self.open(): - return self.get_metadata() + try: + self._metadata = read_metadata(self._data_file) + except (OSError, IOError) as err: + if err.errno in (errno.ENOENT, errno.ESTALE): + raise DiskFileNotExist + raise err + + if self._metadata and self._is_object_expired(self._metadata): + raise DiskFileExpired(metadata=self._metadata) + + try: + self._stat = do_stat(self._data_file) + self._is_dir = stat.S_ISDIR(self._stat.st_mode) + except (OSError, IOError) as err: + if err.errno in (errno.ENOENT, errno.ESTALE): + raise DiskFileNotExist + raise err + + if not validate_object(self._metadata, self._stat): + # Metadata is stale/invalid. So open the object for reading + # to update Etag and other metadata. + with self.open(): + return self.get_metadata() + else: + # Metadata is valid. Don't have to open the file. + self._filter_metadata() + return self._metadata def reader(self, iter_hook=None, keep_cache=False): """ @@ -1096,12 +1129,12 @@ class DiskFile(object): errors as the `create()` method. """ try: - metadata = read_metadata(self._data_file) + metadata = self._metadata or read_metadata(self._data_file) except (IOError, OSError) as err: - if err.errno != errno.ENOENT: + if err.errno not in (errno.ESTALE, errno.ENOENT): raise else: - if metadata[X_TIMESTAMP] >= timestamp: + if metadata and metadata[X_TIMESTAMP] >= timestamp: return self._threadpool.run_in_thread(self._unlinkold) diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index bc38790..bb37cd8 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -211,6 +211,55 @@ class TestDiskFile(unittest.TestCase): assert gdf._disk_file_open is True assert gdf._disk_file_open is False + def test_read_metadata_optimize_open_close(self): + the_path = os.path.join(self.td, "vol0", "ufo47", "bar") + the_file = os.path.join(the_path, "z") + os.makedirs(the_path) + with open(the_file, "wb") as fd: + fd.write("1234") + init_md = { + 'X-Type': 'Object', + 'X-Object-Type': 'file', + 'Content-Length': 4, + 'ETag': md5("1234").hexdigest(), + 'X-Timestamp': normalize_timestamp(os.stat(the_file).st_ctime), + 'Content-Type': 'application/octet-stream'} + _metadata[_mapit(the_file)] = init_md + gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z") + assert gdf._obj == "z" + assert gdf._fd is None + assert gdf._disk_file_open is False + assert gdf._metadata is None + assert not gdf._is_dir + + # Case 1 + # Ensure that reading metadata for non-GET requests + # does not incur opening and closing the file when + # metadata is NOT stale. + mock_open = Mock() + mock_close = Mock() + with mock.patch("swiftonfile.swift.obj.diskfile.do_open", mock_open): + with mock.patch("swiftonfile.swift.obj.diskfile.do_close", + mock_close): + md = gdf.read_metadata() + self.assertEqual(md, init_md) + self.assertFalse(mock_open.called) + self.assertFalse(mock_close.called) + + # Case 2 + # Ensure that reading metadata for non-GET requests + # still opens and reads the file when metadata is stale + with open(the_file, "a") as fd: + # Append to the existing file to make the stored metadata + # invalid/stale. + fd.write("5678") + md = gdf.read_metadata() + # Check that the stale metadata is recalculated to account for + # change in file content + self.assertNotEqual(md, init_md) + self.assertEqual(md['Content-Length'], 8) + self.assertEqual(md['ETag'], md5("12345678").hexdigest()) + def test_open_and_close(self): mock_close = Mock()