use in-memory count for directory size.

Only check the directory size when we first start the
RollManager. From then on, incrementally add the payload
size. This will result in a difference between on-disk size
vs. file sizes. But, it's much faster than checking every N
times ... those stats() checks are very expensive.

Change-Id: I71b1455db0ea073a4f563f115a693027de50638b
This commit is contained in:
Sandy Walsh 2015-03-18 10:28:03 -03:00
parent 5f91c87739
commit f80109094b
2 changed files with 46 additions and 21 deletions

View File

@ -153,7 +153,13 @@ class WritingJSONRollManager(object):
self.directory = kwargs.get('directory', '.')
self.destination_directory = kwargs.get('destination_directory', '.')
self.roll_size_mb = int(kwargs.get('roll_size_mb', 1000))
self.check_delay = 0 # Only check directory size every N events.
# Read the directory at the start, but incrementally
# update the size as we write more files. Doing a full
# disk stat every time is way too expensive.
# Of course, this means we are not accounting for
# block size.
self.directory_size = self._get_directory_size()
def _make_filename(self, crc, prefix):
now = notification_utils.now()
@ -167,19 +173,22 @@ class WritingJSONRollManager(object):
return os.path.join(prefix, f)
def _should_tar(self):
return (self.directory_size / 1048576) >= self.roll_size_mb
def _get_directory_size(self):
size = 0
for f in os.listdir(self.directory):
full = os.path.join(self.directory, f)
if os.path.isfile(full):
size += os.path.getsize(full)
return (size / 1048576) >= self.roll_size_mb
return size
def _tar_directory(self):
# tar all the files in working directory into an archive
# in destination_directory.
crc = "archive"
filename = self._make_filename(crc, self.destination_directory) + ".tar.gz"
filename = self._make_filename(crc, self.destination_directory) + \
".tar.gz"
# No contextmgr for tarfile in 2.6 :(
tar = tarfile.open(filename, "w:gz")
@ -195,13 +204,6 @@ class WritingJSONRollManager(object):
if os.path.isfile(full):
os.remove(full)
def _delay_check(self):
self.check_delay += 1
if self.check_delay > 250:
self.check_delay = 0
return False
return True
def write(self, metadata, json_payload):
# Metadata is ignored.
crc = hashlib.sha256(json_payload).hexdigest()
@ -209,8 +211,7 @@ class WritingJSONRollManager(object):
with open(filename, "w") as f:
f.write(json_payload)
if self._delay_check():
return
self.directory_size += len(json_payload)
if not self._should_tar():
return
@ -218,6 +219,5 @@ class WritingJSONRollManager(object):
self._tar_directory()
self._clean_working_directory()
def close(self):
pass

View File

@ -95,7 +95,10 @@ class TestWriting(unittest.TestCase):
class TestJSONRollManager(unittest.TestCase):
def test_make_filename(self):
@mock.patch(
"shoebox.roll_manager.WritingJSONRollManager._get_directory_size")
def test_make_filename(self, gds):
gds.return_value = 1000
now = datetime.datetime(day=1, month=2, year=2014,
hour=10, minute=11, second=12)
with mock.patch.object(notification_utils, "now") as dt:
@ -110,20 +113,34 @@ class TestJSONRollManager(unittest.TestCase):
@mock.patch('os.path.getsize')
@mock.patch('os.listdir')
@mock.patch('os.path.isfile')
def test_should_tar(self, isf, ld, gs):
def test_get_directory_size(self, isf, ld, gs):
rm = roll_manager.WritingJSONRollManager("template.foo")
gs.return_value = 250000
ld.return_value = ['a', 'b', 'c']
isf.return_value = True
rm.roll_size_mb = 1
self.assertFalse(rm._should_tar())
self.assertEqual(250000*3, rm._get_directory_size())
ld.return_value = ['a', 'b', 'c', 'd', 'e', 'f']
self.assertEqual(250000*6, rm._get_directory_size())
@mock.patch(
"shoebox.roll_manager.WritingJSONRollManager._get_directory_size")
def test_should_tar(self, gds):
gds.return_value = 1000
rm = roll_manager.WritingJSONRollManager("template.foo")
rm.directory_size = 9 * 1048576
rm.roll_size_mb = 10
self.assertFalse(rm._should_tar())
rm.directory_size = 10 * 1048576
rm.roll_size_mb = 10
self.assertTrue(rm._should_tar())
@mock.patch('os.listdir')
@mock.patch('os.remove')
@mock.patch('os.path.isfile')
def test_clean_working_directory(self, isf, rem, ld):
@mock.patch(
"shoebox.roll_manager.WritingJSONRollManager._get_directory_size")
def test_clean_working_directory(self, gds, isf, rem, ld):
gds.return_value = 1000
isf.return_value = True
rm = roll_manager.WritingJSONRollManager("template.foo")
ld.return_value = ['a', 'b', 'c']
@ -133,9 +150,13 @@ class TestJSONRollManager(unittest.TestCase):
@mock.patch('os.listdir')
@mock.patch('tarfile.open')
@mock.patch('os.path.isfile')
def test_tar_directory(self, isf, to, ld):
@mock.patch(
"shoebox.roll_manager.WritingJSONRollManager._get_directory_size")
def test_tar_directory(self, gds, isf, to, ld):
gds.return_value = 1000
ld.return_value = ['a', 'b', 'c']
isf.return_value = True
gds = 1000
rm = roll_manager.WritingJSONRollManager("template.foo")
open_name = '%s.open' % roll_manager.__name__
@ -145,7 +166,10 @@ class TestJSONRollManager(unittest.TestCase):
rm._tar_directory()
self.assertTrue(to.called)
def test_write(self):
@mock.patch(
"shoebox.roll_manager.WritingJSONRollManager._get_directory_size")
def test_write(self, gds):
gds.return_value = 0
rm = roll_manager.WritingJSONRollManager("template.foo")
payload = "some big payload"
open_name = '%s.open' % roll_manager.__name__
@ -158,3 +182,4 @@ class TestJSONRollManager(unittest.TestCase):
self.assertTrue(mock_open.called_once_with(
"template.foo", "wb"))
self.assertFalse(td.called)
self.assertEqual(rm.directory_size, len(payload))