swift/test/unit/obj/test_updater.py

306 lines
11 KiB
Python
Raw Normal View History

# Copyright (c) 2010-2012 OpenStack Foundation
2010-07-12 17:03:45 -05:00
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cPickle as pickle
import mock
2010-07-12 17:03:45 -05:00
import os
import unittest
from contextlib import closing
2010-07-12 17:03:45 -05:00
from gzip import GzipFile
from shutil import rmtree
from time import time
2011-04-20 19:54:28 +00:00
from distutils.dir_util import mkpath
2010-07-12 17:03:45 -05:00
from eventlet import spawn, Timeout, listen
2010-07-12 17:03:45 -05:00
DiskFile API, with reference implementation Refactor on-disk knowledge out of the object server by pushing the async update pickle creation to the new DiskFileManager class (name is not the best, so suggestions welcome), along with the REPLICATOR method logic. We also move the mount checking and thread pool storage to the new ondisk.Devices object, which then also becomes the new home of the audit_location_generator method. For the object server, a new setup() method is now called at the end of the controller's construction, and the _diskfile() method has been renamed to get_diskfile(), to allow implementation specific behavior. We then hide the need for the REST API layer to know how and where quarantining needs to be performed. There are now two places it is checked internally, on open() where we verify the content-length, name, and x-timestamp metadata, and in the reader on close where the etag metadata is checked if the entire file was read. We add a reader class to allow implementations to isolate the WSGI handling code for that specific environment (it is used no-where else in the REST APIs). This simplifies the caller's code to just use a "with" statement once open to avoid multiple points where close needs to be called. For a full historical comparison, including the usage patterns see: https://gist.github.com/portante/5488238 (as of master, 2b639f5, Merge "Fix 500 from account-quota This Commit middleware") --------------------------------+------------------------------------ DiskFileManager(conf) Methods: .pickle_async_update() .get_diskfile() .get_hashes() Attributes: .devices .logger .disk_chunk_size .keep_cache_size .bytes_per_sync DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o) Methods: Methods: *.__iter__() .close(verify_file=) .is_deleted() .is_expired() .quarantine() .get_data_file_size() .open() .read_metadata() .create() .create() .write_metadata() .delete() .delete() Attributes: Attributes: .quarantined_dir .keep_cache .metadata *DiskFileReader() Methods: .__iter__() .close() Attributes: +.was_quarantined DiskWriter() DiskFileWriter() Methods: Methods: .write() .write() .put() .put() * Note that the DiskFile class * Note that the DiskReader() object implements all the methods returned by the necessary for a WSGI app DiskFileOpened.reader() method iterator implements all the methods necessary for a WSGI app iterator + Note that if the auditor is refactored to not use the DiskFile class, see https://review.openstack.org/44787 then we don't need the was_quarantined attribute A reference "in-memory" object server implementation of a backend DiskFile class in swift/obj/mem_server.py and swift/obj/mem_diskfile.py. One can also reference https://github.com/portante/gluster-swift/commits/diskfile for the proposed integration with the gluster-swift code based on these changes. Change-Id: I44e153fdb405a5743e9c05349008f94136764916 Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
from swift.obj import updater as object_updater
from swift.obj.diskfile import ASYNCDIR
2010-07-12 17:03:45 -05:00
from swift.common.ring import RingData
from swift.common import utils
from swift.common.utils import hash_path, normalize_timestamp, mkdirs, \
write_pickle
from test.unit import FakeLogger
2010-07-12 17:03:45 -05:00
class TestObjectUpdater(unittest.TestCase):
def setUp(self):
utils.HASH_PATH_SUFFIX = 'endcap'
utils.HASH_PATH_PREFIX = ''
2010-07-12 17:03:45 -05:00
self.testdir = os.path.join(os.path.dirname(__file__),
'object_updater')
2010-07-12 17:03:45 -05:00
rmtree(self.testdir, ignore_errors=1)
os.mkdir(self.testdir)
ring_file = os.path.join(self.testdir, 'container.ring.gz')
with closing(GzipFile(ring_file, 'wb')) as f:
pickle.dump(
RingData([[0, 1, 2, 0, 1, 2],
[1, 2, 0, 1, 2, 0],
[2, 3, 1, 2, 3, 1]],
[{'id': 0, 'ip': '127.0.0.1', 'port': 1,
'device': 'sda1', 'zone': 0},
{'id': 1, 'ip': '127.0.0.1', 'port': 1,
'device': 'sda1', 'zone': 2},
{'id': 2, 'ip': '127.0.0.1', 'port': 1,
'device': 'sda1', 'zone': 4}], 30),
f)
2010-07-12 17:03:45 -05:00
self.devices_dir = os.path.join(self.testdir, 'devices')
os.mkdir(self.devices_dir)
self.sda1 = os.path.join(self.devices_dir, 'sda1')
os.mkdir(self.sda1)
2011-04-20 19:54:28 +00:00
os.mkdir(os.path.join(self.sda1, 'tmp'))
2010-07-12 17:03:45 -05:00
def tearDown(self):
rmtree(self.testdir, ignore_errors=1)
def test_creation(self):
2010-08-20 00:42:38 +00:00
cu = object_updater.ObjectUpdater({
'devices': self.devices_dir,
'mount_check': 'false',
'swift_dir': self.testdir,
'interval': '1',
'concurrency': '2',
'node_timeout': '5'})
2010-07-12 17:03:45 -05:00
self.assert_(hasattr(cu, 'logger'))
self.assert_(cu.logger is not None)
self.assertEquals(cu.devices, self.devices_dir)
self.assertEquals(cu.interval, 1)
self.assertEquals(cu.concurrency, 2)
self.assertEquals(cu.node_timeout, 5)
self.assert_(cu.get_container_ring() is not None)
2011-04-20 19:54:28 +00:00
def test_object_sweep(self):
prefix_dir = os.path.join(self.sda1, ASYNCDIR, 'abc')
mkpath(prefix_dir)
# A non-directory where directory is expected should just be skipped...
not_a_dir_path = os.path.join(self.sda1, ASYNCDIR, 'not_a_dir')
with open(not_a_dir_path, 'w'):
pass
2011-04-20 19:54:28 +00:00
objects = {
'a': [1089.3, 18.37, 12.83, 1.3],
'b': [49.4, 49.3, 49.2, 49.1],
'c': [109984.123],
}
expected = set()
for o, timestamps in objects.iteritems():
ohash = hash_path('account', 'container', o)
for t in timestamps:
o_path = os.path.join(prefix_dir, ohash + '-' +
normalize_timestamp(t))
2011-04-20 19:54:28 +00:00
if t == timestamps[0]:
expected.add(o_path)
write_pickle({}, o_path)
seen = set()
class MockObjectUpdater(object_updater.ObjectUpdater):
def process_object_update(self, update_path, device):
seen.add(update_path)
os.unlink(update_path)
cu = MockObjectUpdater({
'devices': self.devices_dir,
'mount_check': 'false',
'swift_dir': self.testdir,
'interval': '1',
'concurrency': '1',
'node_timeout': '5'})
2011-04-20 19:54:28 +00:00
cu.object_sweep(self.sda1)
self.assert_(not os.path.exists(prefix_dir))
self.assert_(os.path.exists(not_a_dir_path))
2011-04-20 19:54:28 +00:00
self.assertEqual(expected, seen)
@mock.patch.object(object_updater, 'ismount')
def test_run_once_with_disk_unmounted(self, mock_ismount):
mock_ismount.return_value = False
2010-08-20 00:42:38 +00:00
cu = object_updater.ObjectUpdater({
'devices': self.devices_dir,
'mount_check': 'false',
'swift_dir': self.testdir,
'interval': '1',
'concurrency': '1',
'node_timeout': '15'})
2010-08-31 23:12:59 +00:00
cu.run_once()
DiskFile API, with reference implementation Refactor on-disk knowledge out of the object server by pushing the async update pickle creation to the new DiskFileManager class (name is not the best, so suggestions welcome), along with the REPLICATOR method logic. We also move the mount checking and thread pool storage to the new ondisk.Devices object, which then also becomes the new home of the audit_location_generator method. For the object server, a new setup() method is now called at the end of the controller's construction, and the _diskfile() method has been renamed to get_diskfile(), to allow implementation specific behavior. We then hide the need for the REST API layer to know how and where quarantining needs to be performed. There are now two places it is checked internally, on open() where we verify the content-length, name, and x-timestamp metadata, and in the reader on close where the etag metadata is checked if the entire file was read. We add a reader class to allow implementations to isolate the WSGI handling code for that specific environment (it is used no-where else in the REST APIs). This simplifies the caller's code to just use a "with" statement once open to avoid multiple points where close needs to be called. For a full historical comparison, including the usage patterns see: https://gist.github.com/portante/5488238 (as of master, 2b639f5, Merge "Fix 500 from account-quota This Commit middleware") --------------------------------+------------------------------------ DiskFileManager(conf) Methods: .pickle_async_update() .get_diskfile() .get_hashes() Attributes: .devices .logger .disk_chunk_size .keep_cache_size .bytes_per_sync DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o) Methods: Methods: *.__iter__() .close(verify_file=) .is_deleted() .is_expired() .quarantine() .get_data_file_size() .open() .read_metadata() .create() .create() .write_metadata() .delete() .delete() Attributes: Attributes: .quarantined_dir .keep_cache .metadata *DiskFileReader() Methods: .__iter__() .close() Attributes: +.was_quarantined DiskWriter() DiskFileWriter() Methods: Methods: .write() .write() .put() .put() * Note that the DiskFile class * Note that the DiskReader() object implements all the methods returned by the necessary for a WSGI app DiskFileOpened.reader() method iterator implements all the methods necessary for a WSGI app iterator + Note that if the auditor is refactored to not use the DiskFile class, see https://review.openstack.org/44787 then we don't need the was_quarantined attribute A reference "in-memory" object server implementation of a backend DiskFile class in swift/obj/mem_server.py and swift/obj/mem_diskfile.py. One can also reference https://github.com/portante/gluster-swift/commits/diskfile for the proposed integration with the gluster-swift code based on these changes. Change-Id: I44e153fdb405a5743e9c05349008f94136764916 Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
async_dir = os.path.join(self.sda1, ASYNCDIR)
2010-07-12 17:03:45 -05:00
os.mkdir(async_dir)
2010-08-31 23:12:59 +00:00
cu.run_once()
2010-07-12 17:03:45 -05:00
self.assert_(os.path.exists(async_dir))
# mount_check == False means no call to ismount
self.assertEqual([], mock_ismount.mock_calls)
2010-07-12 17:03:45 -05:00
cu = object_updater.ObjectUpdater({
'devices': self.devices_dir,
'mount_check': 'TrUe',
'swift_dir': self.testdir,
'interval': '1',
'concurrency': '1',
'node_timeout': '15'})
odd_dir = os.path.join(async_dir, 'not really supposed to be here')
os.mkdir(odd_dir)
cu.logger = FakeLogger()
cu.run_once()
self.assert_(os.path.exists(async_dir))
self.assert_(os.path.exists(odd_dir)) # skipped because not mounted!
# mount_check == True means ismount was checked
self.assertEqual([
mock.call(self.sda1),
], mock_ismount.mock_calls)
self.assertEqual(cu.logger.get_increment_counts(), {'errors': 1})
@mock.patch.object(object_updater, 'ismount')
def test_run_once(self, mock_ismount):
mock_ismount.return_value = True
cu = object_updater.ObjectUpdater({
'devices': self.devices_dir,
'mount_check': 'false',
'swift_dir': self.testdir,
'interval': '1',
'concurrency': '1',
'node_timeout': '15'})
cu.run_once()
async_dir = os.path.join(self.sda1, ASYNCDIR)
os.mkdir(async_dir)
cu.run_once()
self.assert_(os.path.exists(async_dir))
# mount_check == False means no call to ismount
self.assertEqual([], mock_ismount.mock_calls)
cu = object_updater.ObjectUpdater({
'devices': self.devices_dir,
'mount_check': 'TrUe',
'swift_dir': self.testdir,
'interval': '1',
'concurrency': '1',
'node_timeout': '15'})
2010-07-12 17:03:45 -05:00
odd_dir = os.path.join(async_dir, 'not really supposed to be here')
os.mkdir(odd_dir)
2010-08-31 23:12:59 +00:00
cu.run_once()
2010-07-12 17:03:45 -05:00
self.assert_(os.path.exists(async_dir))
self.assert_(not os.path.exists(odd_dir))
# mount_check == True means ismount was checked
self.assertEqual([
mock.call(self.sda1),
], mock_ismount.mock_calls)
2010-07-12 17:03:45 -05:00
ohash = hash_path('a', 'c', 'o')
odir = os.path.join(async_dir, ohash[-3:])
mkdirs(odir)
older_op_path = os.path.join(
odir,
'%s-%s' % (ohash, normalize_timestamp(time() - 1)))
op_path = os.path.join(
odir,
2010-07-12 17:03:45 -05:00
'%s-%s' % (ohash, normalize_timestamp(time())))
for path in (op_path, older_op_path):
with open(path, 'wb') as async_pending:
pickle.dump({'op': 'PUT', 'account': 'a', 'container': 'c',
'obj': 'o', 'headers': {
'X-Container-Timestamp':
normalize_timestamp(0)}},
async_pending)
cu.logger = FakeLogger()
2010-08-31 23:12:59 +00:00
cu.run_once()
self.assert_(not os.path.exists(older_op_path))
2010-07-12 17:03:45 -05:00
self.assert_(os.path.exists(op_path))
self.assertEqual(cu.logger.get_increment_counts(),
{'failures': 1, 'unlinks': 1})
self.assertEqual(None,
pickle.load(open(op_path)).get('successes'))
2010-07-12 17:03:45 -05:00
bindsock = listen(('127.0.0.1', 0))
2011-04-20 19:54:28 +00:00
2010-08-26 09:03:08 -07:00
def accepter(sock, return_code):
2010-07-12 17:03:45 -05:00
try:
with Timeout(3):
inc = sock.makefile('rb')
out = sock.makefile('wb')
out.write('HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' %
return_code)
out.flush()
self.assertEquals(inc.readline(),
'PUT /sda1/0/a/c/o HTTP/1.1\r\n')
headers = {}
line = inc.readline()
while line and line != '\r\n':
headers[line.split(':')[0].lower()] = \
line.split(':')[1].strip()
line = inc.readline()
self.assert_('x-container-timestamp' in headers)
except BaseException as err:
2010-07-12 17:03:45 -05:00
return err
return None
2011-04-20 19:54:28 +00:00
def accept(return_codes):
codes = iter(return_codes)
2010-08-26 09:03:08 -07:00
try:
events = []
for x in xrange(len(return_codes)):
2010-08-26 09:03:08 -07:00
with Timeout(3):
sock, addr = bindsock.accept()
events.append(
spawn(accepter, sock, codes.next()))
2010-08-26 09:03:08 -07:00
for event in events:
err = event.wait()
if err:
raise err
except BaseException as err:
2010-08-26 09:03:08 -07:00
return err
return None
event = spawn(accept, [201, 500, 500])
2010-07-12 17:03:45 -05:00
for dev in cu.get_container_ring().devs:
if dev is not None:
dev['port'] = bindsock.getsockname()[1]
cu.logger = FakeLogger()
2010-08-31 23:12:59 +00:00
cu.run_once()
2010-08-26 09:03:08 -07:00
err = event.wait()
if err:
raise err
self.assert_(os.path.exists(op_path))
self.assertEqual(cu.logger.get_increment_counts(),
{'failures': 1})
self.assertEqual([0],
pickle.load(open(op_path)).get('successes'))
event = spawn(accept, [404, 500])
cu.logger = FakeLogger()
cu.run_once()
err = event.wait()
if err:
raise err
self.assert_(os.path.exists(op_path))
self.assertEqual(cu.logger.get_increment_counts(),
{'failures': 1})
self.assertEqual([0, 1],
pickle.load(open(op_path)).get('successes'))
event = spawn(accept, [201])
cu.logger = FakeLogger()
cu.run_once()
err = event.wait()
2010-08-26 09:03:08 -07:00
if err:
raise err
2010-07-12 17:03:45 -05:00
self.assert_(not os.path.exists(op_path))
self.assertEqual(cu.logger.get_increment_counts(),
{'unlinks': 1, 'successes': 1})
2010-07-12 17:03:45 -05:00
if __name__ == '__main__':
unittest.main()