2013-09-20 01:00:54 +08:00
|
|
|
# Copyright (c) 2010-2012 OpenStack Foundation
|
2010-07-12 17:03:45 -05:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
|
|
# implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
import unittest
|
|
|
|
import os
|
2012-12-17 06:39:25 -05:00
|
|
|
import mock
|
2010-07-12 17:03:45 -05:00
|
|
|
from gzip import GzipFile
|
|
|
|
from shutil import rmtree
|
|
|
|
import cPickle as pickle
|
2010-10-29 15:26:35 -07:00
|
|
|
import time
|
2010-11-16 11:06:39 -08:00
|
|
|
import tempfile
|
2013-07-20 13:44:11 -07:00
|
|
|
from contextlib import contextmanager, closing
|
2014-05-29 00:54:07 -07:00
|
|
|
from errno import ENOENT, ENOTEMPTY, ENOTDIR
|
2013-07-20 13:44:11 -07:00
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
from eventlet.green import subprocess
|
2011-11-28 09:13:41 -08:00
|
|
|
from eventlet import Timeout, tpool
|
2013-07-20 13:44:11 -07:00
|
|
|
|
2015-02-12 16:18:54 -08:00
|
|
|
from test.unit import FakeLogger, debug_logger, patch_policies
|
2013-10-07 12:10:31 +00:00
|
|
|
from swift.common import utils
|
2014-05-29 00:54:07 -07:00
|
|
|
from swift.common.utils import hash_path, mkdirs, normalize_timestamp, \
|
|
|
|
storage_directory
|
2010-07-12 17:03:45 -05:00
|
|
|
from swift.common import ring
|
2013-07-29 15:49:37 -04:00
|
|
|
from swift.obj import diskfile, replicator as object_replicator
|
2014-06-23 12:52:50 -07:00
|
|
|
from swift.common.storage_policy import StoragePolicy, POLICIES
|
2010-07-12 17:03:45 -05:00
|
|
|
|
2010-11-05 09:15:31 -07:00
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
def _ips():
|
2010-11-05 09:15:31 -07:00
|
|
|
return ['127.0.0.0']
|
2010-07-12 17:03:45 -05:00
|
|
|
object_replicator.whataremyips = _ips
|
|
|
|
|
2010-11-05 09:15:31 -07:00
|
|
|
|
2010-10-29 15:26:35 -07:00
|
|
|
def mock_http_connect(status):
|
|
|
|
|
|
|
|
class FakeConn(object):
|
|
|
|
|
|
|
|
def __init__(self, status, *args, **kwargs):
|
|
|
|
self.status = status
|
|
|
|
self.reason = 'Fake'
|
|
|
|
self.host = args[0]
|
|
|
|
self.port = args[1]
|
|
|
|
self.method = args[4]
|
|
|
|
self.path = args[5]
|
|
|
|
self.with_exc = False
|
2010-11-05 09:15:31 -07:00
|
|
|
self.headers = kwargs.get('headers', {})
|
2010-10-29 15:26:35 -07:00
|
|
|
|
|
|
|
def getresponse(self):
|
|
|
|
if self.with_exc:
|
|
|
|
raise Exception('test')
|
|
|
|
return self
|
|
|
|
|
|
|
|
def getheader(self, header):
|
|
|
|
return self.headers[header]
|
|
|
|
|
|
|
|
def read(self, amt=None):
|
|
|
|
return pickle.dumps({})
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
return
|
|
|
|
return lambda *args, **kwargs: FakeConn(status, *args, **kwargs)
|
|
|
|
|
|
|
|
process_errors = []
|
|
|
|
|
2010-11-05 09:15:31 -07:00
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
class MockProcess(object):
|
|
|
|
ret_code = None
|
|
|
|
ret_log = None
|
2010-10-29 15:26:35 -07:00
|
|
|
check_args = None
|
2010-07-12 17:03:45 -05:00
|
|
|
|
|
|
|
class Stream(object):
|
2010-11-05 09:15:31 -07:00
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
def read(self):
|
|
|
|
return MockProcess.ret_log.next()
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
2010-10-29 15:26:35 -07:00
|
|
|
targs = MockProcess.check_args.next()
|
|
|
|
for targ in targs:
|
2014-05-29 00:54:07 -07:00
|
|
|
# Allow more than 2 candidate targs
|
|
|
|
# (e.g. a case that either node is fine when nodes shuffled)
|
|
|
|
if isinstance(targ, tuple):
|
|
|
|
allowed = False
|
|
|
|
for target in targ:
|
|
|
|
if target in args[0]:
|
|
|
|
allowed = True
|
|
|
|
if not allowed:
|
|
|
|
process_errors.append("Invalid: %s not in %s" % (targ,
|
|
|
|
args))
|
|
|
|
else:
|
|
|
|
if targ not in args[0]:
|
|
|
|
process_errors.append("Invalid: %s not in %s" % (targ,
|
|
|
|
args))
|
2010-07-12 17:03:45 -05:00
|
|
|
self.stdout = self.Stream()
|
|
|
|
|
|
|
|
def wait(self):
|
|
|
|
return self.ret_code.next()
|
|
|
|
|
2010-11-05 09:15:31 -07:00
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
@contextmanager
|
|
|
|
def _mock_process(ret):
|
|
|
|
orig_process = subprocess.Popen
|
|
|
|
MockProcess.ret_code = (i[0] for i in ret)
|
|
|
|
MockProcess.ret_log = (i[1] for i in ret)
|
2010-10-29 15:26:35 -07:00
|
|
|
MockProcess.check_args = (i[2] for i in ret)
|
2010-07-12 17:03:45 -05:00
|
|
|
object_replicator.subprocess.Popen = MockProcess
|
|
|
|
yield
|
|
|
|
object_replicator.subprocess.Popen = orig_process
|
|
|
|
|
2010-11-05 09:15:31 -07:00
|
|
|
|
2015-02-26 16:37:16 -08:00
|
|
|
def _create_test_rings(path, devs=None):
|
2010-07-12 17:03:45 -05:00
|
|
|
testgz = os.path.join(path, 'object.ring.gz')
|
|
|
|
intended_replica2part2dev_id = [
|
|
|
|
[0, 1, 2, 3, 4, 5, 6],
|
|
|
|
[1, 2, 3, 0, 5, 6, 4],
|
|
|
|
[2, 3, 0, 1, 6, 4, 5],
|
2013-07-22 15:27:54 -07:00
|
|
|
]
|
2015-02-26 16:37:16 -08:00
|
|
|
intended_devs = devs or [
|
2014-05-29 00:54:07 -07:00
|
|
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
|
|
|
'region': 1, 'ip': '127.0.0.0', 'port': 6000},
|
|
|
|
{'id': 1, 'device': 'sda', 'zone': 1,
|
|
|
|
'region': 2, 'ip': '127.0.0.1', 'port': 6000},
|
|
|
|
{'id': 2, 'device': 'sda', 'zone': 2,
|
|
|
|
'region': 1, 'ip': '127.0.0.2', 'port': 6000},
|
|
|
|
{'id': 3, 'device': 'sda', 'zone': 4,
|
|
|
|
'region': 2, 'ip': '127.0.0.3', 'port': 6000},
|
|
|
|
{'id': 4, 'device': 'sda', 'zone': 5,
|
|
|
|
'region': 1, 'ip': '127.0.0.4', 'port': 6000},
|
2012-06-04 13:27:39 +02:00
|
|
|
{'id': 5, 'device': 'sda', 'zone': 6,
|
2014-05-29 00:54:07 -07:00
|
|
|
'region': 2, 'ip': 'fe80::202:b3ff:fe1e:8329', 'port': 6000},
|
|
|
|
{'id': 6, 'device': 'sda', 'zone': 7, 'region': 1,
|
2012-06-04 13:27:39 +02:00
|
|
|
'ip': '2001:0db8:85a3:0000:0000:8a2e:0370:7334', 'port': 6000},
|
2013-07-20 13:44:11 -07:00
|
|
|
]
|
2010-07-12 17:03:45 -05:00
|
|
|
intended_part_shift = 30
|
2013-07-20 13:44:11 -07:00
|
|
|
with closing(GzipFile(testgz, 'wb')) as f:
|
2013-08-31 23:42:43 -04:00
|
|
|
pickle.dump(
|
|
|
|
ring.RingData(intended_replica2part2dev_id,
|
|
|
|
intended_devs, intended_part_shift),
|
2013-07-20 13:44:11 -07:00
|
|
|
f)
|
2014-03-18 10:50:17 -07:00
|
|
|
|
|
|
|
testgz = os.path.join(path, 'object-1.ring.gz')
|
|
|
|
with closing(GzipFile(testgz, 'wb')) as f:
|
|
|
|
pickle.dump(
|
|
|
|
ring.RingData(intended_replica2part2dev_id,
|
|
|
|
intended_devs, intended_part_shift),
|
|
|
|
f)
|
2015-02-26 16:37:16 -08:00
|
|
|
for policy in POLICIES:
|
|
|
|
policy.object_ring = None # force reload
|
2014-03-18 10:50:17 -07:00
|
|
|
return
|
2010-07-12 17:03:45 -05:00
|
|
|
|
|
|
|
|
2014-03-18 10:50:17 -07:00
|
|
|
@patch_policies([StoragePolicy(0, 'zero', False),
|
|
|
|
StoragePolicy(1, 'one', True)])
|
2010-07-12 17:03:45 -05:00
|
|
|
class TestObjectReplicator(unittest.TestCase):
|
|
|
|
|
|
|
|
def setUp(self):
|
2013-10-07 12:10:31 +00:00
|
|
|
utils.HASH_PATH_SUFFIX = 'endcap'
|
|
|
|
utils.HASH_PATH_PREFIX = ''
|
2010-07-12 17:03:45 -05:00
|
|
|
# Setup a test ring (stolen from common/test_ring.py)
|
2010-11-16 11:06:39 -08:00
|
|
|
self.testdir = tempfile.mkdtemp()
|
2010-07-12 17:03:45 -05:00
|
|
|
self.devices = os.path.join(self.testdir, 'node')
|
|
|
|
rmtree(self.testdir, ignore_errors=1)
|
|
|
|
os.mkdir(self.testdir)
|
|
|
|
os.mkdir(self.devices)
|
|
|
|
os.mkdir(os.path.join(self.devices, 'sda'))
|
2014-03-18 10:50:17 -07:00
|
|
|
self.objects = os.path.join(self.devices, 'sda',
|
|
|
|
diskfile.get_data_dir(0))
|
|
|
|
self.objects_1 = os.path.join(self.devices, 'sda',
|
|
|
|
diskfile.get_data_dir(1))
|
2010-07-12 17:03:45 -05:00
|
|
|
os.mkdir(self.objects)
|
2014-03-18 10:50:17 -07:00
|
|
|
os.mkdir(self.objects_1)
|
2010-10-29 15:26:35 -07:00
|
|
|
self.parts = {}
|
2014-03-18 10:50:17 -07:00
|
|
|
self.parts_1 = {}
|
2010-11-05 09:15:31 -07:00
|
|
|
for part in ['0', '1', '2', '3']:
|
2010-10-29 15:26:35 -07:00
|
|
|
self.parts[part] = os.path.join(self.objects, part)
|
2014-09-27 23:49:38 +08:00
|
|
|
os.mkdir(self.parts[part])
|
2014-03-18 10:50:17 -07:00
|
|
|
self.parts_1[part] = os.path.join(self.objects_1, part)
|
2014-09-27 23:49:38 +08:00
|
|
|
os.mkdir(self.parts_1[part])
|
2014-03-18 10:50:17 -07:00
|
|
|
_create_test_rings(self.testdir)
|
2010-07-12 17:03:45 -05:00
|
|
|
self.conf = dict(
|
|
|
|
swift_dir=self.testdir, devices=self.devices, mount_check='false',
|
2015-02-12 16:18:54 -08:00
|
|
|
timeout='300', stats_interval='1', sync_method='rsync')
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
self.replicator = object_replicator.ObjectReplicator(self.conf)
|
2012-09-04 13:59:26 -07:00
|
|
|
self.replicator.logger = FakeLogger()
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
self.df_mgr = diskfile.DiskFileManager(self.conf,
|
|
|
|
self.replicator.logger)
|
2010-07-12 17:03:45 -05:00
|
|
|
|
2010-10-29 15:26:35 -07:00
|
|
|
def tearDown(self):
|
|
|
|
rmtree(self.testdir, ignore_errors=1)
|
|
|
|
|
|
|
|
def test_run_once(self):
|
2014-03-18 10:50:17 -07:00
|
|
|
conf = dict(swift_dir=self.testdir, devices=self.devices,
|
|
|
|
mount_check='false', timeout='300', stats_interval='1')
|
|
|
|
replicator = object_replicator.ObjectReplicator(conf)
|
2010-11-03 16:08:13 -07:00
|
|
|
was_connector = object_replicator.http_connect
|
2010-10-29 15:26:35 -07:00
|
|
|
object_replicator.http_connect = mock_http_connect(200)
|
|
|
|
cur_part = '0'
|
2014-03-18 10:50:17 -07:00
|
|
|
df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o',
|
|
|
|
policy_idx=0)
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
2010-10-29 15:26:35 -07:00
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects, cur_part, data_dir)
|
|
|
|
process_arg_checker = []
|
2014-03-18 10:50:17 -07:00
|
|
|
ring = replicator.get_object_ring(0)
|
2010-10-29 15:26:35 -07:00
|
|
|
nodes = [node for node in
|
2014-03-18 10:50:17 -07:00
|
|
|
ring.get_part_nodes(int(cur_part))
|
2013-08-31 23:42:43 -04:00
|
|
|
if node['ip'] not in _ips()]
|
2014-05-29 00:54:07 -07:00
|
|
|
rsync_mods = tuple(['%s::object/sda/objects/%s' %
|
|
|
|
(node['ip'], cur_part) for node in nodes])
|
2010-10-29 15:26:35 -07:00
|
|
|
for node in nodes:
|
2010-11-03 16:08:13 -07:00
|
|
|
process_arg_checker.append(
|
2014-05-29 00:54:07 -07:00
|
|
|
(0, '', ['rsync', whole_path_from, rsync_mods]))
|
2010-10-29 15:26:35 -07:00
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
replicator.run_once()
|
|
|
|
self.assertFalse(process_errors)
|
2014-03-18 10:50:17 -07:00
|
|
|
object_replicator.http_connect = was_connector
|
2010-10-29 15:26:35 -07:00
|
|
|
|
2014-03-18 10:50:17 -07:00
|
|
|
# policy 1
|
|
|
|
def test_run_once_1(self):
|
|
|
|
conf = dict(swift_dir=self.testdir, devices=self.devices,
|
|
|
|
mount_check='false', timeout='300', stats_interval='1')
|
|
|
|
replicator = object_replicator.ObjectReplicator(conf)
|
|
|
|
was_connector = object_replicator.http_connect
|
|
|
|
object_replicator.http_connect = mock_http_connect(200)
|
|
|
|
cur_part = '0'
|
|
|
|
df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o',
|
|
|
|
policy_idx=1)
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects_1, cur_part, data_dir)
|
|
|
|
process_arg_checker = []
|
|
|
|
ring = replicator.get_object_ring(1)
|
|
|
|
nodes = [node for node in
|
|
|
|
ring.get_part_nodes(int(cur_part))
|
|
|
|
if node['ip'] not in _ips()]
|
2014-05-29 00:54:07 -07:00
|
|
|
rsync_mods = tuple(['%s::object/sda/objects-1/%s' %
|
|
|
|
(node['ip'], cur_part) for node in nodes])
|
2014-03-18 10:50:17 -07:00
|
|
|
for node in nodes:
|
|
|
|
process_arg_checker.append(
|
2014-05-29 00:54:07 -07:00
|
|
|
(0, '', ['rsync', whole_path_from, rsync_mods]))
|
2014-03-18 10:50:17 -07:00
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
replicator.run_once()
|
|
|
|
self.assertFalse(process_errors)
|
2010-11-03 16:08:13 -07:00
|
|
|
object_replicator.http_connect = was_connector
|
|
|
|
|
2010-11-05 09:15:31 -07:00
|
|
|
def test_check_ring(self):
|
2014-03-18 10:50:17 -07:00
|
|
|
for pol in POLICIES:
|
|
|
|
obj_ring = self.replicator.get_object_ring(pol.idx)
|
|
|
|
self.assertTrue(self.replicator.check_ring(obj_ring))
|
|
|
|
orig_check = self.replicator.next_check
|
|
|
|
self.replicator.next_check = orig_check - 30
|
|
|
|
self.assertTrue(self.replicator.check_ring(obj_ring))
|
|
|
|
self.replicator.next_check = orig_check
|
|
|
|
orig_ring_time = obj_ring._mtime
|
|
|
|
obj_ring._mtime = orig_ring_time - 30
|
|
|
|
self.assertTrue(self.replicator.check_ring(obj_ring))
|
|
|
|
self.replicator.next_check = orig_check - 30
|
|
|
|
self.assertFalse(self.replicator.check_ring(obj_ring))
|
2010-11-05 09:15:31 -07:00
|
|
|
|
2013-01-12 07:25:15 +00:00
|
|
|
def test_collect_jobs_mkdirs_error(self):
|
|
|
|
|
|
|
|
def blowup_mkdirs(path):
|
|
|
|
raise OSError('Ow!')
|
|
|
|
|
2015-01-22 15:26:19 -08:00
|
|
|
with mock.patch.object(object_replicator, 'mkdirs', blowup_mkdirs):
|
2013-01-12 07:25:15 +00:00
|
|
|
rmtree(self.objects, ignore_errors=1)
|
|
|
|
object_replicator.mkdirs = blowup_mkdirs
|
2013-03-26 20:42:26 +00:00
|
|
|
self.replicator.collect_jobs()
|
2013-01-12 07:25:15 +00:00
|
|
|
self.assertTrue('exception' in self.replicator.logger.log_dict)
|
|
|
|
self.assertEquals(
|
|
|
|
len(self.replicator.logger.log_dict['exception']), 1)
|
|
|
|
exc_args, exc_kwargs, exc_str = \
|
|
|
|
self.replicator.logger.log_dict['exception'][0]
|
|
|
|
self.assertEquals(len(exc_args), 1)
|
|
|
|
self.assertTrue(exc_args[0].startswith('ERROR creating '))
|
|
|
|
self.assertEquals(exc_kwargs, {})
|
|
|
|
self.assertEquals(exc_str, 'Ow!')
|
|
|
|
|
2010-11-05 09:15:31 -07:00
|
|
|
def test_collect_jobs(self):
|
|
|
|
jobs = self.replicator.collect_jobs()
|
|
|
|
jobs_to_delete = [j for j in jobs if j['delete']]
|
2014-03-18 10:50:17 -07:00
|
|
|
jobs_by_pol_part = {}
|
2010-11-05 09:15:31 -07:00
|
|
|
for job in jobs:
|
2014-03-18 10:50:17 -07:00
|
|
|
jobs_by_pol_part[str(job['policy_idx']) + job['partition']] = job
|
|
|
|
self.assertEquals(len(jobs_to_delete), 2)
|
|
|
|
self.assertTrue('1', jobs_to_delete[0]['partition'])
|
2010-11-05 09:15:31 -07:00
|
|
|
self.assertEquals(
|
2014-03-18 10:50:17 -07:00
|
|
|
[node['id'] for node in jobs_by_pol_part['00']['nodes']], [1, 2])
|
2010-11-05 09:15:31 -07:00
|
|
|
self.assertEquals(
|
2014-03-18 10:50:17 -07:00
|
|
|
[node['id'] for node in jobs_by_pol_part['01']['nodes']],
|
|
|
|
[1, 2, 3])
|
2010-11-05 09:15:31 -07:00
|
|
|
self.assertEquals(
|
2014-03-18 10:50:17 -07:00
|
|
|
[node['id'] for node in jobs_by_pol_part['02']['nodes']], [2, 3])
|
2010-11-05 09:15:31 -07:00
|
|
|
self.assertEquals(
|
2014-03-18 10:50:17 -07:00
|
|
|
[node['id'] for node in jobs_by_pol_part['03']['nodes']], [3, 1])
|
|
|
|
self.assertEquals(
|
|
|
|
[node['id'] for node in jobs_by_pol_part['10']['nodes']], [1, 2])
|
|
|
|
self.assertEquals(
|
|
|
|
[node['id'] for node in jobs_by_pol_part['11']['nodes']],
|
|
|
|
[1, 2, 3])
|
|
|
|
self.assertEquals(
|
|
|
|
[node['id'] for node in jobs_by_pol_part['12']['nodes']], [2, 3])
|
|
|
|
self.assertEquals(
|
|
|
|
[node['id'] for node in jobs_by_pol_part['13']['nodes']], [3, 1])
|
|
|
|
for part in ['00', '01', '02', '03', ]:
|
|
|
|
for node in jobs_by_pol_part[part]['nodes']:
|
|
|
|
self.assertEquals(node['device'], 'sda')
|
|
|
|
self.assertEquals(jobs_by_pol_part[part]['path'],
|
|
|
|
os.path.join(self.objects, part[1:]))
|
|
|
|
for part in ['10', '11', '12', '13', ]:
|
|
|
|
for node in jobs_by_pol_part[part]['nodes']:
|
2010-11-05 09:15:31 -07:00
|
|
|
self.assertEquals(node['device'], 'sda')
|
2014-03-18 10:50:17 -07:00
|
|
|
self.assertEquals(jobs_by_pol_part[part]['path'],
|
|
|
|
os.path.join(self.objects_1, part[1:]))
|
2010-11-05 09:15:31 -07:00
|
|
|
|
2013-08-22 19:23:29 +00:00
|
|
|
def test_collect_jobs_handoffs_first(self):
|
|
|
|
self.replicator.handoffs_first = True
|
|
|
|
jobs = self.replicator.collect_jobs()
|
|
|
|
self.assertTrue(jobs[0]['delete'])
|
|
|
|
self.assertEquals('1', jobs[0]['partition'])
|
|
|
|
|
Improve object-replicator startup time.
The object replicator checks each partition directory to ensure it's
really a directory and not a zero-byte file. This was happening in
collect_jobs(), which is the first thing that the object replicator
does.
The effect was that, at startup, the object-replicator process would
list each "objects" or "objects-N" directory on each object device,
then stat() every single thing in there. On devices with lots of
partitions on them, this makes the replicator take a long time before
it does anything useful.
If you have a cluster with a too-high part_power plus some failing
disks elsewhere, you can easily get thousands of partition directories
on each disk. If you've got 36 disks per node, that turns into a very
long wait for the object replicator to do anything. Worse yet, if you
add in a configuration management system that pushes new rings every
couple hours, the object replicator can spend the vast majority of its
time collecting jobs, then only spend a short time doing useful work
before the ring changes and it has to start all over again.
This commit moves the stat() call (os.path.isfile) to the loop that
processes jobs. In a complete pass, the total work done is about the
same, but the replicator starts doing useful work much sooner.
Change-Id: I5ed4cd09dde514ec7d1e74afe35feaab0cf28a10
2014-12-08 15:05:29 -08:00
|
|
|
def test_replicator_skips_bogus_partition_dirs(self):
|
|
|
|
# A directory in the wrong place shouldn't crash the replicator
|
|
|
|
rmtree(self.objects)
|
|
|
|
rmtree(self.objects_1)
|
|
|
|
os.mkdir(self.objects)
|
|
|
|
os.mkdir(self.objects_1)
|
|
|
|
|
|
|
|
os.mkdir(os.path.join(self.objects, "burrito"))
|
|
|
|
jobs = self.replicator.collect_jobs()
|
|
|
|
self.assertEqual(len(jobs), 0)
|
|
|
|
|
|
|
|
def test_replicator_removes_zbf(self):
|
|
|
|
# After running xfs_repair, a partition directory could become a
|
|
|
|
# zero-byte file. If this happens, the replicator should clean it
|
|
|
|
# up, log something, and move on to the next partition.
|
|
|
|
|
|
|
|
# Surprise! Partition dir 1 is actually a zero-byte file.
|
|
|
|
pol_0_part_1_path = os.path.join(self.objects, '1')
|
|
|
|
rmtree(pol_0_part_1_path)
|
|
|
|
with open(pol_0_part_1_path, 'w'):
|
2012-09-04 13:59:26 -07:00
|
|
|
pass
|
Improve object-replicator startup time.
The object replicator checks each partition directory to ensure it's
really a directory and not a zero-byte file. This was happening in
collect_jobs(), which is the first thing that the object replicator
does.
The effect was that, at startup, the object-replicator process would
list each "objects" or "objects-N" directory on each object device,
then stat() every single thing in there. On devices with lots of
partitions on them, this makes the replicator take a long time before
it does anything useful.
If you have a cluster with a too-high part_power plus some failing
disks elsewhere, you can easily get thousands of partition directories
on each disk. If you've got 36 disks per node, that turns into a very
long wait for the object replicator to do anything. Worse yet, if you
add in a configuration management system that pushes new rings every
couple hours, the object replicator can spend the vast majority of its
time collecting jobs, then only spend a short time doing useful work
before the ring changes and it has to start all over again.
This commit moves the stat() call (os.path.isfile) to the loop that
processes jobs. In a complete pass, the total work done is about the
same, but the replicator starts doing useful work much sooner.
Change-Id: I5ed4cd09dde514ec7d1e74afe35feaab0cf28a10
2014-12-08 15:05:29 -08:00
|
|
|
self.assertTrue(os.path.isfile(pol_0_part_1_path)) # sanity check
|
|
|
|
|
|
|
|
# Policy 1's partition dir 1 is also a zero-byte file.
|
|
|
|
pol_1_part_1_path = os.path.join(self.objects_1, '1')
|
|
|
|
rmtree(pol_1_part_1_path)
|
|
|
|
with open(pol_1_part_1_path, 'w'):
|
2014-03-18 10:50:17 -07:00
|
|
|
pass
|
Improve object-replicator startup time.
The object replicator checks each partition directory to ensure it's
really a directory and not a zero-byte file. This was happening in
collect_jobs(), which is the first thing that the object replicator
does.
The effect was that, at startup, the object-replicator process would
list each "objects" or "objects-N" directory on each object device,
then stat() every single thing in there. On devices with lots of
partitions on them, this makes the replicator take a long time before
it does anything useful.
If you have a cluster with a too-high part_power plus some failing
disks elsewhere, you can easily get thousands of partition directories
on each disk. If you've got 36 disks per node, that turns into a very
long wait for the object replicator to do anything. Worse yet, if you
add in a configuration management system that pushes new rings every
couple hours, the object replicator can spend the vast majority of its
time collecting jobs, then only spend a short time doing useful work
before the ring changes and it has to start all over again.
This commit moves the stat() call (os.path.isfile) to the loop that
processes jobs. In a complete pass, the total work done is about the
same, but the replicator starts doing useful work much sooner.
Change-Id: I5ed4cd09dde514ec7d1e74afe35feaab0cf28a10
2014-12-08 15:05:29 -08:00
|
|
|
self.assertTrue(os.path.isfile(pol_1_part_1_path)) # sanity check
|
|
|
|
|
|
|
|
# Don't delete things in collect_jobs(); all the stat() calls would
|
|
|
|
# make replicator startup really slow.
|
|
|
|
self.replicator.collect_jobs()
|
|
|
|
self.assertTrue(os.path.exists(pol_0_part_1_path))
|
|
|
|
self.assertTrue(os.path.exists(pol_1_part_1_path))
|
|
|
|
|
|
|
|
# After a replication pass, the files should be gone
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
self.replicator.run_once()
|
|
|
|
|
|
|
|
self.assertFalse(os.path.exists(pol_0_part_1_path))
|
|
|
|
self.assertFalse(os.path.exists(pol_1_part_1_path))
|
|
|
|
|
|
|
|
logged_warnings = sorted(self.replicator.logger.log_dict['warning'])
|
2014-03-18 10:50:17 -07:00
|
|
|
self.assertEquals(
|
|
|
|
(('Removing partition directory which was a file: %s',
|
Improve object-replicator startup time.
The object replicator checks each partition directory to ensure it's
really a directory and not a zero-byte file. This was happening in
collect_jobs(), which is the first thing that the object replicator
does.
The effect was that, at startup, the object-replicator process would
list each "objects" or "objects-N" directory on each object device,
then stat() every single thing in there. On devices with lots of
partitions on them, this makes the replicator take a long time before
it does anything useful.
If you have a cluster with a too-high part_power plus some failing
disks elsewhere, you can easily get thousands of partition directories
on each disk. If you've got 36 disks per node, that turns into a very
long wait for the object replicator to do anything. Worse yet, if you
add in a configuration management system that pushes new rings every
couple hours, the object replicator can spend the vast majority of its
time collecting jobs, then only spend a short time doing useful work
before the ring changes and it has to start all over again.
This commit moves the stat() call (os.path.isfile) to the loop that
processes jobs. In a complete pass, the total work done is about the
same, but the replicator starts doing useful work much sooner.
Change-Id: I5ed4cd09dde514ec7d1e74afe35feaab0cf28a10
2014-12-08 15:05:29 -08:00
|
|
|
pol_1_part_1_path), {}), logged_warnings[0])
|
2012-09-04 13:59:26 -07:00
|
|
|
self.assertEquals(
|
2014-03-18 10:50:17 -07:00
|
|
|
(('Removing partition directory which was a file: %s',
|
Improve object-replicator startup time.
The object replicator checks each partition directory to ensure it's
really a directory and not a zero-byte file. This was happening in
collect_jobs(), which is the first thing that the object replicator
does.
The effect was that, at startup, the object-replicator process would
list each "objects" or "objects-N" directory on each object device,
then stat() every single thing in there. On devices with lots of
partitions on them, this makes the replicator take a long time before
it does anything useful.
If you have a cluster with a too-high part_power plus some failing
disks elsewhere, you can easily get thousands of partition directories
on each disk. If you've got 36 disks per node, that turns into a very
long wait for the object replicator to do anything. Worse yet, if you
add in a configuration management system that pushes new rings every
couple hours, the object replicator can spend the vast majority of its
time collecting jobs, then only spend a short time doing useful work
before the ring changes and it has to start all over again.
This commit moves the stat() call (os.path.isfile) to the loop that
processes jobs. In a complete pass, the total work done is about the
same, but the replicator starts doing useful work much sooner.
Change-Id: I5ed4cd09dde514ec7d1e74afe35feaab0cf28a10
2014-12-08 15:05:29 -08:00
|
|
|
pol_0_part_1_path), {}), logged_warnings[1])
|
2012-09-04 13:59:26 -07:00
|
|
|
|
2010-11-05 09:15:31 -07:00
|
|
|
def test_delete_partition(self):
|
2013-07-30 17:16:59 +02:00
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
2013-08-22 19:23:29 +00:00
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
2014-03-18 10:50:17 -07:00
|
|
|
ring = self.replicator.get_object_ring(0)
|
2013-08-22 19:23:29 +00:00
|
|
|
nodes = [node for node in
|
2014-03-18 10:50:17 -07:00
|
|
|
ring.get_part_nodes(1)
|
2013-08-22 19:23:29 +00:00
|
|
|
if node['ip'] not in _ips()]
|
|
|
|
process_arg_checker = []
|
|
|
|
for node in nodes:
|
|
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
|
|
process_arg_checker.append(
|
|
|
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
|
2015-02-26 16:37:16 -08:00
|
|
|
def test_delete_partition_default_sync_method(self):
|
|
|
|
self.replicator.conf.pop('sync_method')
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
ring = self.replicator.get_object_ring(0)
|
|
|
|
nodes = [node for node in
|
|
|
|
ring.get_part_nodes(1)
|
|
|
|
if node['ip'] not in _ips()]
|
|
|
|
process_arg_checker = []
|
|
|
|
for node in nodes:
|
|
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
|
|
process_arg_checker.append(
|
|
|
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
def test_delete_partition_ssync_single_region(self):
|
|
|
|
devs = [
|
|
|
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
|
|
|
'region': 1, 'ip': '127.0.0.0', 'port': 6000},
|
|
|
|
{'id': 1, 'device': 'sda', 'zone': 1,
|
|
|
|
'region': 1, 'ip': '127.0.0.1', 'port': 6000},
|
|
|
|
{'id': 2, 'device': 'sda', 'zone': 2,
|
|
|
|
'region': 1, 'ip': '127.0.0.2', 'port': 6000},
|
|
|
|
{'id': 3, 'device': 'sda', 'zone': 4,
|
|
|
|
'region': 1, 'ip': '127.0.0.3', 'port': 6000},
|
|
|
|
{'id': 4, 'device': 'sda', 'zone': 5,
|
|
|
|
'region': 1, 'ip': '127.0.0.4', 'port': 6000},
|
|
|
|
{'id': 5, 'device': 'sda', 'zone': 6,
|
|
|
|
'region': 1, 'ip': 'fe80::202:b3ff:fe1e:8329', 'port': 6000},
|
|
|
|
{'id': 6, 'device': 'sda', 'zone': 7, 'region': 1,
|
|
|
|
'ip': '2001:0db8:85a3:0000:0000:8a2e:0370:7334', 'port': 6000},
|
|
|
|
]
|
|
|
|
_create_test_rings(self.testdir, devs=devs)
|
|
|
|
self.conf['sync_method'] = 'ssync'
|
|
|
|
self.replicator = object_replicator.ObjectReplicator(self.conf)
|
|
|
|
self.replicator.logger = debug_logger()
|
|
|
|
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
def _fake_ssync(node, job, suffixes, **kwargs):
|
|
|
|
return True, set([ohash])
|
|
|
|
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
|
2014-03-18 10:50:17 -07:00
|
|
|
def test_delete_partition_1(self):
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
|
|
policy_idx=1)
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects_1, '1', data_dir)
|
|
|
|
part_path = os.path.join(self.objects_1, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
ring = self.replicator.get_object_ring(1)
|
|
|
|
nodes = [node for node in
|
|
|
|
ring.get_part_nodes(1)
|
|
|
|
if node['ip'] not in _ips()]
|
|
|
|
process_arg_checker = []
|
|
|
|
for node in nodes:
|
|
|
|
rsync_mod = '%s::object/sda/objects-1/%s' % (node['ip'], 1)
|
|
|
|
process_arg_checker.append(
|
|
|
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
|
2013-08-22 19:23:29 +00:00
|
|
|
def test_delete_partition_with_failures(self):
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
2013-08-22 19:23:29 +00:00
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
2014-03-18 10:50:17 -07:00
|
|
|
ring = self.replicator.get_object_ring(0)
|
2013-08-22 19:23:29 +00:00
|
|
|
nodes = [node for node in
|
2014-03-18 10:50:17 -07:00
|
|
|
ring.get_part_nodes(1)
|
2013-08-22 19:23:29 +00:00
|
|
|
if node['ip'] not in _ips()]
|
|
|
|
process_arg_checker = []
|
|
|
|
for i, node in enumerate(nodes):
|
|
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
|
|
if i == 0:
|
|
|
|
# force one of the rsync calls to fail
|
|
|
|
ret_code = 1
|
|
|
|
else:
|
|
|
|
ret_code = 0
|
|
|
|
process_arg_checker.append(
|
|
|
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The path should still exist
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
def test_delete_partition_with_handoff_delete(self):
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
self.replicator.handoff_delete = 2
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
2013-08-22 19:23:29 +00:00
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
2013-07-30 17:16:59 +02:00
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
2014-03-18 10:50:17 -07:00
|
|
|
ring = self.replicator.get_object_ring(0)
|
2013-08-22 19:23:29 +00:00
|
|
|
nodes = [node for node in
|
2014-03-18 10:50:17 -07:00
|
|
|
ring.get_part_nodes(1)
|
2013-08-22 19:23:29 +00:00
|
|
|
if node['ip'] not in _ips()]
|
|
|
|
process_arg_checker = []
|
|
|
|
for i, node in enumerate(nodes):
|
|
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
|
|
if i == 0:
|
|
|
|
# force one of the rsync calls to fail
|
|
|
|
ret_code = 1
|
|
|
|
else:
|
|
|
|
ret_code = 0
|
|
|
|
process_arg_checker.append(
|
|
|
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
self.replicator.replicate()
|
2013-07-30 17:16:59 +02:00
|
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
2010-11-05 09:15:31 -07:00
|
|
|
|
2013-08-22 19:23:29 +00:00
|
|
|
def test_delete_partition_with_handoff_delete_failures(self):
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
self.replicator.handoff_delete = 2
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
2013-08-22 19:23:29 +00:00
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
2014-03-18 10:50:17 -07:00
|
|
|
ring = self.replicator.get_object_ring(0)
|
2013-08-22 19:23:29 +00:00
|
|
|
nodes = [node for node in
|
2014-03-18 10:50:17 -07:00
|
|
|
ring.get_part_nodes(1)
|
2013-08-22 19:23:29 +00:00
|
|
|
if node['ip'] not in _ips()]
|
|
|
|
process_arg_checker = []
|
|
|
|
for i, node in enumerate(nodes):
|
|
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
|
|
if i in (0, 1):
|
|
|
|
# force two of the rsync calls to fail
|
|
|
|
ret_code = 1
|
|
|
|
else:
|
|
|
|
ret_code = 0
|
|
|
|
process_arg_checker.append(
|
|
|
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The file should still exist
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
2014-05-29 00:54:07 -07:00
|
|
|
def test_delete_partition_with_handoff_delete_fail_in_other_region(self):
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
ring = self.replicator.get_object_ring(0)
|
|
|
|
nodes = [node for node in
|
|
|
|
ring.get_part_nodes(1)
|
|
|
|
if node['ip'] not in _ips()]
|
|
|
|
process_arg_checker = []
|
|
|
|
for node in nodes:
|
|
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
|
|
if node['region'] != 1:
|
|
|
|
# the rsync calls for other region to fail
|
|
|
|
ret_code = 1
|
|
|
|
else:
|
|
|
|
ret_code = 0
|
|
|
|
process_arg_checker.append(
|
|
|
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The file should still exist
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
2012-09-28 12:24:15 -07:00
|
|
|
def test_delete_partition_override_params(self):
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
2012-09-28 12:24:15 -07:00
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.replicator.replicate(override_devices=['sdb'])
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.replicator.replicate(override_partitions=['9'])
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.replicator.replicate(override_devices=['sda'],
|
|
|
|
override_partitions=['1'])
|
|
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
|
2015-01-22 15:26:19 -08:00
|
|
|
def test_delete_policy_override_params(self):
|
|
|
|
df0 = self.df_mgr.get_diskfile('sda', '99', 'a', 'c', 'o')
|
|
|
|
df1 = self.df_mgr.get_diskfile('sda', '99', 'a', 'c', 'o',
|
|
|
|
policy_idx=1)
|
|
|
|
mkdirs(df0._datadir)
|
|
|
|
mkdirs(df1._datadir)
|
|
|
|
|
|
|
|
pol0_part_path = os.path.join(self.objects, '99')
|
|
|
|
pol1_part_path = os.path.join(self.objects_1, '99')
|
|
|
|
|
|
|
|
# sanity checks
|
|
|
|
self.assertTrue(os.access(pol0_part_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(pol1_part_path, os.F_OK))
|
|
|
|
|
|
|
|
# a bogus policy index doesn't bother the replicator any more than a
|
|
|
|
# bogus device or partition does
|
|
|
|
self.replicator.run_once(policies='1,2,5')
|
|
|
|
|
|
|
|
self.assertFalse(os.access(pol1_part_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(pol0_part_path, os.F_OK))
|
|
|
|
|
2014-05-29 00:54:07 -07:00
|
|
|
def test_delete_partition_ssync(self):
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('0')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
self.call_nums = 0
|
|
|
|
self.conf['sync_method'] = 'ssync'
|
|
|
|
|
|
|
|
def _fake_ssync(node, job, suffixes, **kwargs):
|
|
|
|
success = True
|
|
|
|
ret_val = [whole_path_from]
|
|
|
|
if self.call_nums == 2:
|
|
|
|
# ssync should return (True, []) only when the second
|
|
|
|
# candidate node has not get the replica yet.
|
|
|
|
success = False
|
|
|
|
ret_val = []
|
|
|
|
self.call_nums += 1
|
|
|
|
return success, set(ret_val)
|
|
|
|
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The file should still exist
|
|
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The file should be deleted at the second replicate call
|
|
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The partition should be deleted at the third replicate call
|
|
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
del self.call_nums
|
|
|
|
|
|
|
|
def test_delete_partition_ssync_with_sync_failure(self):
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('0')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.call_nums = 0
|
|
|
|
self.conf['sync_method'] = 'ssync'
|
|
|
|
|
2015-02-12 16:15:42 -08:00
|
|
|
def _fake_ssync(node, job, suffixes, **kwags):
|
2014-05-29 00:54:07 -07:00
|
|
|
success = False
|
|
|
|
ret_val = []
|
|
|
|
if self.call_nums == 2:
|
|
|
|
# ssync should return (True, []) only when the second
|
|
|
|
# candidate node has not get the replica yet.
|
|
|
|
success = True
|
|
|
|
ret_val = [whole_path_from]
|
|
|
|
self.call_nums += 1
|
|
|
|
return success, set(ret_val)
|
|
|
|
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The file should still exist
|
|
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The file should still exist
|
|
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The file should still exist
|
|
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
del self.call_nums
|
|
|
|
|
2015-02-12 16:18:54 -08:00
|
|
|
def test_delete_objs_ssync_only_when_in_sync(self):
|
|
|
|
self.replicator.logger = debug_logger('test-replicator')
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('0')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.call_nums = 0
|
|
|
|
self.conf['sync_method'] = 'ssync'
|
|
|
|
|
|
|
|
in_sync_objs = []
|
|
|
|
|
|
|
|
def _fake_ssync(node, job, suffixes, remote_check_objs=None):
|
|
|
|
self.call_nums += 1
|
|
|
|
if remote_check_objs is None:
|
|
|
|
# sync job
|
|
|
|
ret_val = [whole_path_from]
|
|
|
|
else:
|
|
|
|
ret_val = in_sync_objs
|
|
|
|
return True, set(ret_val)
|
|
|
|
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertEqual(3, self.call_nums)
|
|
|
|
# The file should still exist
|
|
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
del self.call_nums
|
|
|
|
|
2014-05-29 00:54:07 -07:00
|
|
|
def test_delete_partition_ssync_with_cleanup_failure(self):
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
self.replicator.logger = mock_logger = mock.MagicMock()
|
|
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('0')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
|
|
part_path = os.path.join(self.objects, '1')
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
self.call_nums = 0
|
|
|
|
self.conf['sync_method'] = 'ssync'
|
|
|
|
|
|
|
|
def _fake_ssync(node, job, suffixes, **kwargs):
|
|
|
|
success = True
|
|
|
|
ret_val = [whole_path_from]
|
|
|
|
if self.call_nums == 2:
|
|
|
|
# ssync should return (True, []) only when the second
|
|
|
|
# candidate node has not get the replica yet.
|
|
|
|
success = False
|
|
|
|
ret_val = []
|
|
|
|
self.call_nums += 1
|
|
|
|
return success, set(ret_val)
|
|
|
|
|
|
|
|
rmdir_func = os.rmdir
|
|
|
|
|
|
|
|
def raise_exception_rmdir(exception_class, error_no):
|
|
|
|
instance = exception_class()
|
|
|
|
instance.errno = error_no
|
|
|
|
|
|
|
|
def func(directory):
|
|
|
|
if directory == suffix_dir_path:
|
|
|
|
raise instance
|
|
|
|
else:
|
|
|
|
rmdir_func(directory)
|
|
|
|
|
|
|
|
return func
|
|
|
|
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
|
|
self.replicator.replicate()
|
|
|
|
# The file should still exist
|
|
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
# Fail with ENOENT
|
|
|
|
with mock.patch('os.rmdir',
|
|
|
|
raise_exception_rmdir(OSError, ENOENT)):
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertEquals(mock_logger.exception.call_count, 0)
|
|
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
# Fail with ENOTEMPTY
|
|
|
|
with mock.patch('os.rmdir',
|
|
|
|
raise_exception_rmdir(OSError, ENOTEMPTY)):
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertEquals(mock_logger.exception.call_count, 0)
|
|
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
# Fail with ENOTDIR
|
|
|
|
with mock.patch('os.rmdir',
|
|
|
|
raise_exception_rmdir(OSError, ENOTDIR)):
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertEquals(mock_logger.exception.call_count, 1)
|
|
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
|
|
|
|
# Finally we can cleanup everything
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.replicator.replicate()
|
|
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
|
2010-11-05 09:15:31 -07:00
|
|
|
def test_run_once_recover_from_failure(self):
|
2014-03-18 10:50:17 -07:00
|
|
|
conf = dict(swift_dir=self.testdir, devices=self.devices,
|
|
|
|
mount_check='false', timeout='300', stats_interval='1')
|
|
|
|
replicator = object_replicator.ObjectReplicator(conf)
|
2010-11-05 09:15:31 -07:00
|
|
|
was_connector = object_replicator.http_connect
|
2011-11-28 09:13:41 -08:00
|
|
|
try:
|
|
|
|
object_replicator.http_connect = mock_http_connect(200)
|
|
|
|
# Write some files into '1' and run replicate- they should be moved
|
2014-09-18 21:16:35 -07:00
|
|
|
# to the other partitions and then node should get deleted.
|
2011-11-28 09:13:41 -08:00
|
|
|
cur_part = '1'
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
2011-11-28 09:13:41 -08:00
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects, cur_part, data_dir)
|
2014-03-18 10:50:17 -07:00
|
|
|
ring = replicator.get_object_ring(0)
|
2011-11-28 09:13:41 -08:00
|
|
|
process_arg_checker = []
|
|
|
|
nodes = [node for node in
|
2014-03-18 10:50:17 -07:00
|
|
|
ring.get_part_nodes(int(cur_part))
|
2013-08-31 23:42:43 -04:00
|
|
|
if node['ip'] not in _ips()]
|
2011-11-28 09:13:41 -08:00
|
|
|
for node in nodes:
|
2012-06-04 13:27:39 +02:00
|
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'],
|
|
|
|
cur_part)
|
2011-11-28 09:13:41 -08:00
|
|
|
process_arg_checker.append(
|
|
|
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
|
|
|
self.assertTrue(os.access(os.path.join(self.objects,
|
|
|
|
'1', data_dir, ohash),
|
|
|
|
os.F_OK))
|
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
replicator.run_once()
|
|
|
|
self.assertFalse(process_errors)
|
|
|
|
for i, result in [('0', True), ('1', False),
|
|
|
|
('2', True), ('3', True)]:
|
|
|
|
self.assertEquals(os.access(
|
2013-08-31 23:42:43 -04:00
|
|
|
os.path.join(self.objects,
|
|
|
|
i, diskfile.HASH_FILE),
|
|
|
|
os.F_OK), result)
|
2011-11-28 09:13:41 -08:00
|
|
|
finally:
|
|
|
|
object_replicator.http_connect = was_connector
|
|
|
|
|
|
|
|
def test_run_once_recover_from_timeout(self):
|
2014-03-18 10:50:17 -07:00
|
|
|
conf = dict(swift_dir=self.testdir, devices=self.devices,
|
|
|
|
mount_check='false', timeout='300', stats_interval='1')
|
|
|
|
replicator = object_replicator.ObjectReplicator(conf)
|
2011-11-28 09:13:41 -08:00
|
|
|
was_connector = object_replicator.http_connect
|
|
|
|
was_get_hashes = object_replicator.get_hashes
|
|
|
|
was_execute = tpool.execute
|
|
|
|
self.get_hash_count = 0
|
|
|
|
try:
|
|
|
|
|
|
|
|
def fake_get_hashes(*args, **kwargs):
|
|
|
|
self.get_hash_count += 1
|
|
|
|
if self.get_hash_count == 3:
|
|
|
|
# raise timeout on last call to get hashes
|
|
|
|
raise Timeout()
|
|
|
|
return 2, {'abc': 'def'}
|
|
|
|
|
|
|
|
def fake_exc(tester, *args, **kwargs):
|
|
|
|
if 'Error syncing partition' in args[0]:
|
|
|
|
tester.i_failed = True
|
|
|
|
|
|
|
|
self.i_failed = False
|
|
|
|
object_replicator.http_connect = mock_http_connect(200)
|
|
|
|
object_replicator.get_hashes = fake_get_hashes
|
|
|
|
replicator.logger.exception = \
|
|
|
|
lambda *args, **kwargs: fake_exc(self, *args, **kwargs)
|
|
|
|
# Write some files into '1' and run replicate- they should be moved
|
As-unique-as-possible partition replica placement.
This commit introduces a new algorithm for assigning partition
replicas to devices. Basically, the ring builder organizes the devices
into tiers (first zone, then IP/port, then device ID). When placing a
replica, the ring builder looks for the emptiest device (biggest
parts_wanted) in the furthest-away tier.
In the case where zone-count >= replica-count, the new algorithm will
give the same results as the one it replaces. Thus, no migration is
needed.
In the case where zone-count < replica-count, the new algorithm
behaves differently from the old algorithm. The new algorithm will
distribute things evenly at each tier so that the replication is as
high-quality as possible, given the circumstances. The old algorithm
would just crash, so again, no migration is needed.
Handoffs have also been updated to use the new algorithm. When
generating handoff nodes, first the ring looks for nodes in other
zones, then other ips/ports, then any other drive. The first handoff
nodes (the ones in other zones) will be the same as before; this
commit just extends the list of handoff nodes.
The proxy server and replicators have been altered to avoid looking at
the ring's replica count directly. Previously, with a replica count of
C, RingData.get_nodes() and RingData.get_part_nodes() would return
lists of length C, so some other code used the replica count when it
needed the number of nodes. If two of a partition's replicas are on
the same device (e.g. with 3 replicas, 2 devices), then that
assumption is no longer true. Fortunately, all the proxy server and
replicators really needed was the number of nodes returned, which they
already had. (Bonus: now the only code that mentions replica_count
directly is in the ring and the ring builder.)
Change-Id: Iba2929edfc6ece89791890d0635d4763d821a3aa
2012-04-23 10:41:44 -07:00
|
|
|
# to the other partitions and then node should get deleted.
|
2011-11-28 09:13:41 -08:00
|
|
|
cur_part = '1'
|
DiskFile API, with reference implementation
Refactor on-disk knowledge out of the object server by pushing the
async update pickle creation to the new DiskFileManager class (name is
not the best, so suggestions welcome), along with the REPLICATOR
method logic. We also move the mount checking and thread pool storage
to the new ondisk.Devices object, which then also becomes the new home
of the audit_location_generator method.
For the object server, a new setup() method is now called at the end
of the controller's construction, and the _diskfile() method has been
renamed to get_diskfile(), to allow implementation specific behavior.
We then hide the need for the REST API layer to know how and where
quarantining needs to be performed. There are now two places it is
checked internally, on open() where we verify the content-length,
name, and x-timestamp metadata, and in the reader on close where the
etag metadata is checked if the entire file was read.
We add a reader class to allow implementations to isolate the WSGI
handling code for that specific environment (it is used no-where else
in the REST APIs). This simplifies the caller's code to just use a
"with" statement once open to avoid multiple points where close needs
to be called.
For a full historical comparison, including the usage patterns see:
https://gist.github.com/portante/5488238
(as of master, 2b639f5, Merge
"Fix 500 from account-quota This Commit
middleware")
--------------------------------+------------------------------------
DiskFileManager(conf)
Methods:
.pickle_async_update()
.get_diskfile()
.get_hashes()
Attributes:
.devices
.logger
.disk_chunk_size
.keep_cache_size
.bytes_per_sync
DiskFile(a,c,o,keep_data_fp=) DiskFile(a,c,o)
Methods: Methods:
*.__iter__()
.close(verify_file=)
.is_deleted()
.is_expired()
.quarantine()
.get_data_file_size()
.open()
.read_metadata()
.create() .create()
.write_metadata()
.delete() .delete()
Attributes: Attributes:
.quarantined_dir
.keep_cache
.metadata
*DiskFileReader()
Methods:
.__iter__()
.close()
Attributes:
+.was_quarantined
DiskWriter() DiskFileWriter()
Methods: Methods:
.write() .write()
.put() .put()
* Note that the DiskFile class * Note that the DiskReader() object
implements all the methods returned by the
necessary for a WSGI app DiskFileOpened.reader() method
iterator implements all the methods
necessary for a WSGI app iterator
+ Note that if the auditor is
refactored to not use the DiskFile
class, see
https://review.openstack.org/44787
then we don't need the
was_quarantined attribute
A reference "in-memory" object server implementation of a backend
DiskFile class in swift/obj/mem_server.py and
swift/obj/mem_diskfile.py.
One can also reference
https://github.com/portante/gluster-swift/commits/diskfile for the
proposed integration with the gluster-swift code based on these
changes.
Change-Id: I44e153fdb405a5743e9c05349008f94136764916
Signed-off-by: Peter Portante <peter.portante@redhat.com>
2013-09-12 19:51:18 -04:00
|
|
|
df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o')
|
|
|
|
mkdirs(df._datadir)
|
|
|
|
f = open(os.path.join(df._datadir,
|
2011-11-28 09:13:41 -08:00
|
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
|
|
'wb')
|
|
|
|
f.write('1234567890')
|
|
|
|
f.close()
|
|
|
|
ohash = hash_path('a', 'c', 'o')
|
|
|
|
data_dir = ohash[-3:]
|
|
|
|
whole_path_from = os.path.join(self.objects, cur_part, data_dir)
|
|
|
|
process_arg_checker = []
|
2014-03-18 10:50:17 -07:00
|
|
|
ring = replicator.get_object_ring(0)
|
2011-11-28 09:13:41 -08:00
|
|
|
nodes = [node for node in
|
2014-03-18 10:50:17 -07:00
|
|
|
ring.get_part_nodes(int(cur_part))
|
2013-08-31 23:42:43 -04:00
|
|
|
if node['ip'] not in _ips()]
|
2011-11-28 09:13:41 -08:00
|
|
|
for node in nodes:
|
2012-06-04 13:27:39 +02:00
|
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'],
|
|
|
|
cur_part)
|
2011-11-28 09:13:41 -08:00
|
|
|
process_arg_checker.append(
|
|
|
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
|
|
|
self.assertTrue(os.access(os.path.join(self.objects,
|
|
|
|
'1', data_dir, ohash),
|
|
|
|
os.F_OK))
|
|
|
|
with _mock_process(process_arg_checker):
|
|
|
|
replicator.run_once()
|
|
|
|
self.assertFalse(process_errors)
|
|
|
|
self.assertFalse(self.i_failed)
|
|
|
|
finally:
|
|
|
|
object_replicator.http_connect = was_connector
|
|
|
|
object_replicator.get_hashes = was_get_hashes
|
|
|
|
tpool.execute = was_execute
|
2010-07-12 17:03:45 -05:00
|
|
|
|
|
|
|
def test_run(self):
|
2010-11-16 08:32:03 -08:00
|
|
|
with _mock_process([(0, '')] * 100):
|
2013-08-05 15:35:34 -07:00
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
self.replicator.replicate()
|
2010-07-12 17:03:45 -05:00
|
|
|
|
|
|
|
def test_run_withlog(self):
|
2010-11-16 08:32:03 -08:00
|
|
|
with _mock_process([(0, "stuff in log")] * 100):
|
2013-08-05 15:35:34 -07:00
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
|
|
mock_http_connect(200)):
|
|
|
|
self.replicator.replicate()
|
2010-07-12 17:03:45 -05:00
|
|
|
|
Object replication ssync (an rsync alternative)
For this commit, ssync is just a direct replacement for how
we use rsync. Assuming we switch over to ssync completely
someday and drop rsync, we will then be able to improve the
algorithms even further (removing local objects as we
successfully transfer each one rather than waiting for whole
partitions, using an index.db with hash-trees, etc., etc.)
For easier review, this commit can be thought of in distinct
parts:
1) New global_conf_callback functionality for allowing
services to perform setup code before workers, etc. are
launched. (This is then used by ssync in the object
server to create a cross-worker semaphore to restrict
concurrent incoming replication.)
2) A bit of shifting of items up from object server and
replicator to diskfile or DEFAULT conf sections for
better sharing of the same settings. conn_timeout,
node_timeout, client_timeout, network_chunk_size,
disk_chunk_size.
3) Modifications to the object server and replicator to
optionally use ssync in place of rsync. This is done in
a generic enough way that switching to FutureSync should
be easy someday.
4) The biggest part, and (at least for now) completely
optional part, are the new ssync_sender and
ssync_receiver files. Nice and isolated for easier
testing and visibility into test coverage, etc.
All the usual logging, statsd, recon, etc. instrumentation
is still there when using ssync, just as it is when using
rsync.
Beyond the essential error and exceptional condition
logging, I have not added any additional instrumentation at
this time. Unless there is something someone finds super
pressing to have added to the logging, I think such
additions would be better as separate change reviews.
FOR NOW, IT IS NOT RECOMMENDED TO USE SSYNC ON PRODUCTION
CLUSTERS. Some of us will be in a limited fashion to look
for any subtle issues, tuning, etc. but generally ssync is
an experimental feature. In its current implementation it is
probably going to be a bit slower than rsync, but if all
goes according to plan it will end up much faster.
There are no comparisions yet between ssync and rsync other
than some raw virtual machine testing I've done to show it
should compete well enough once we can put it in use in the
real world.
If you Tweet, Google+, or whatever, be sure to indicate it's
experimental. It'd be best to keep it out of deployment
guides, howtos, etc. until we all figure out if we like it,
find it to be stable, etc.
Change-Id: If003dcc6f4109e2d2a42f4873a0779110fff16d6
2013-08-28 16:10:43 +00:00
|
|
|
def test_sync_just_calls_sync_method(self):
|
|
|
|
self.replicator.sync_method = mock.MagicMock()
|
|
|
|
self.replicator.sync('node', 'job', 'suffixes')
|
|
|
|
self.replicator.sync_method.assert_called_once_with(
|
|
|
|
'node', 'job', 'suffixes')
|
|
|
|
|
2012-12-17 06:39:25 -05:00
|
|
|
@mock.patch('swift.obj.replicator.tpool_reraise', autospec=True)
|
|
|
|
@mock.patch('swift.obj.replicator.http_connect', autospec=True)
|
|
|
|
def test_update(self, mock_http, mock_tpool_reraise):
|
|
|
|
|
|
|
|
def set_default(self):
|
|
|
|
self.replicator.suffix_count = 0
|
|
|
|
self.replicator.suffix_sync = 0
|
|
|
|
self.replicator.suffix_hash = 0
|
|
|
|
self.replicator.replication_count = 0
|
|
|
|
self.replicator.partition_times = []
|
|
|
|
|
|
|
|
self.headers = {'Content-Length': '0',
|
Object services user-agent string uses full name
It does not appear that, aside from the user-agent string, the strings
"obj-server", "obj-updater", or "obj-replicator" (or "obj-<anything>"*)
appear in the swift code base, aside from the directory containing the
object services code being named "obj".
Furthermore, the container, account, and proxy services construct their
user-agent string, as reported in the logs, using their full name. In
addition, this full name also shows up as the name of the process via
"ps" or "top", etc., which can make it easier for admins to match log
entries with other tools.
For consistency, we update the object services to use an "object-"
prefix rather than "obj-" in its user agent string.
* obj-etag does appear in a unit test, but not part of the regular
code.
Change-Id: I914fc189514207df2535731eda10cb4b3d30cc6c
2014-06-23 12:59:24 -07:00
|
|
|
'user-agent': 'object-replicator %s' % os.getpid()}
|
2012-12-17 06:39:25 -05:00
|
|
|
self.replicator.logger = mock_logger = mock.MagicMock()
|
|
|
|
mock_tpool_reraise.return_value = (0, {})
|
|
|
|
|
|
|
|
all_jobs = self.replicator.collect_jobs()
|
|
|
|
jobs = [job for job in all_jobs if not job['delete']]
|
|
|
|
|
|
|
|
mock_http.return_value = answer = mock.MagicMock()
|
|
|
|
answer.getresponse.return_value = resp = mock.MagicMock()
|
|
|
|
# Check uncorrect http_connect with status 507 and
|
|
|
|
# count of attempts and call args
|
|
|
|
resp.status = 507
|
|
|
|
error = '%(ip)s/%(device)s responded as unmounted'
|
|
|
|
expect = 'Error syncing partition'
|
|
|
|
for job in jobs:
|
|
|
|
set_default(self)
|
2014-03-18 10:50:17 -07:00
|
|
|
ring = self.replicator.get_object_ring(job['policy_idx'])
|
2014-06-23 12:52:50 -07:00
|
|
|
self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx']
|
2012-12-17 06:39:25 -05:00
|
|
|
self.replicator.update(job)
|
|
|
|
self.assertTrue(error in mock_logger.error.call_args[0][0])
|
|
|
|
self.assertTrue(expect in mock_logger.exception.call_args[0][0])
|
|
|
|
self.assertEquals(len(self.replicator.partition_times), 1)
|
2014-03-18 10:50:17 -07:00
|
|
|
self.assertEquals(mock_http.call_count, len(ring._devs) - 1)
|
2012-12-17 06:39:25 -05:00
|
|
|
reqs = []
|
|
|
|
for node in job['nodes']:
|
2013-08-31 23:42:43 -04:00
|
|
|
reqs.append(mock.call(node['ip'], node['port'], node['device'],
|
|
|
|
job['partition'], 'REPLICATE', '',
|
|
|
|
headers=self.headers))
|
2012-12-17 06:39:25 -05:00
|
|
|
if job['partition'] == '0':
|
|
|
|
self.assertEquals(self.replicator.suffix_hash, 0)
|
|
|
|
mock_http.assert_has_calls(reqs, any_order=True)
|
|
|
|
mock_http.reset_mock()
|
|
|
|
mock_logger.reset_mock()
|
|
|
|
|
|
|
|
# Check uncorrect http_connect with status 400 != HTTP_OK
|
|
|
|
resp.status = 400
|
|
|
|
error = 'Invalid response %(resp)s from %(ip)s'
|
|
|
|
for job in jobs:
|
|
|
|
set_default(self)
|
|
|
|
self.replicator.update(job)
|
|
|
|
self.assertTrue(error in mock_logger.error.call_args[0][0])
|
|
|
|
self.assertEquals(len(self.replicator.partition_times), 1)
|
|
|
|
mock_logger.reset_mock()
|
|
|
|
|
|
|
|
# Check successful http_connection and exception with
|
|
|
|
# uncorrect pickle.loads(resp.read())
|
|
|
|
resp.status = 200
|
|
|
|
expect = 'Error syncing with node:'
|
|
|
|
for job in jobs:
|
|
|
|
set_default(self)
|
|
|
|
self.replicator.update(job)
|
|
|
|
self.assertTrue(expect in mock_logger.exception.call_args[0][0])
|
|
|
|
self.assertEquals(len(self.replicator.partition_times), 1)
|
|
|
|
mock_logger.reset_mock()
|
|
|
|
|
|
|
|
# Check successful http_connection and correct
|
|
|
|
# pickle.loads(resp.read()) for non local node
|
|
|
|
resp.status = 200
|
|
|
|
local_job = None
|
|
|
|
resp.read.return_value = pickle.dumps({})
|
|
|
|
for job in jobs:
|
|
|
|
set_default(self)
|
2014-09-18 21:16:35 -07:00
|
|
|
# limit local job to policy 0 for simplicity
|
2014-03-18 10:50:17 -07:00
|
|
|
if job['partition'] == '0' and job['policy_idx'] == 0:
|
2012-12-17 06:39:25 -05:00
|
|
|
local_job = job.copy()
|
|
|
|
continue
|
|
|
|
self.replicator.update(job)
|
|
|
|
self.assertEquals(mock_logger.exception.call_count, 0)
|
|
|
|
self.assertEquals(mock_logger.error.call_count, 0)
|
|
|
|
self.assertEquals(len(self.replicator.partition_times), 1)
|
|
|
|
self.assertEquals(self.replicator.suffix_hash, 0)
|
|
|
|
self.assertEquals(self.replicator.suffix_sync, 0)
|
|
|
|
self.assertEquals(self.replicator.suffix_count, 0)
|
|
|
|
mock_logger.reset_mock()
|
|
|
|
|
Object replication ssync (an rsync alternative)
For this commit, ssync is just a direct replacement for how
we use rsync. Assuming we switch over to ssync completely
someday and drop rsync, we will then be able to improve the
algorithms even further (removing local objects as we
successfully transfer each one rather than waiting for whole
partitions, using an index.db with hash-trees, etc., etc.)
For easier review, this commit can be thought of in distinct
parts:
1) New global_conf_callback functionality for allowing
services to perform setup code before workers, etc. are
launched. (This is then used by ssync in the object
server to create a cross-worker semaphore to restrict
concurrent incoming replication.)
2) A bit of shifting of items up from object server and
replicator to diskfile or DEFAULT conf sections for
better sharing of the same settings. conn_timeout,
node_timeout, client_timeout, network_chunk_size,
disk_chunk_size.
3) Modifications to the object server and replicator to
optionally use ssync in place of rsync. This is done in
a generic enough way that switching to FutureSync should
be easy someday.
4) The biggest part, and (at least for now) completely
optional part, are the new ssync_sender and
ssync_receiver files. Nice and isolated for easier
testing and visibility into test coverage, etc.
All the usual logging, statsd, recon, etc. instrumentation
is still there when using ssync, just as it is when using
rsync.
Beyond the essential error and exceptional condition
logging, I have not added any additional instrumentation at
this time. Unless there is something someone finds super
pressing to have added to the logging, I think such
additions would be better as separate change reviews.
FOR NOW, IT IS NOT RECOMMENDED TO USE SSYNC ON PRODUCTION
CLUSTERS. Some of us will be in a limited fashion to look
for any subtle issues, tuning, etc. but generally ssync is
an experimental feature. In its current implementation it is
probably going to be a bit slower than rsync, but if all
goes according to plan it will end up much faster.
There are no comparisions yet between ssync and rsync other
than some raw virtual machine testing I've done to show it
should compete well enough once we can put it in use in the
real world.
If you Tweet, Google+, or whatever, be sure to indicate it's
experimental. It'd be best to keep it out of deployment
guides, howtos, etc. until we all figure out if we like it,
find it to be stable, etc.
Change-Id: If003dcc6f4109e2d2a42f4873a0779110fff16d6
2013-08-28 16:10:43 +00:00
|
|
|
# Check successful http_connect and sync for local node
|
2012-12-17 06:39:25 -05:00
|
|
|
mock_tpool_reraise.return_value = (1, {'a83': 'ba47fd314242ec8c'
|
|
|
|
'7efb91f5d57336e4'})
|
|
|
|
resp.read.return_value = pickle.dumps({'a83': 'c130a2c17ed45102a'
|
|
|
|
'ada0f4eee69494ff'})
|
|
|
|
set_default(self)
|
2014-05-29 00:54:07 -07:00
|
|
|
self.replicator.sync = fake_func = \
|
|
|
|
mock.MagicMock(return_value=(True, []))
|
2012-12-17 06:39:25 -05:00
|
|
|
self.replicator.update(local_job)
|
|
|
|
reqs = []
|
|
|
|
for node in local_job['nodes']:
|
2013-07-22 15:27:54 -07:00
|
|
|
reqs.append(mock.call(node, local_job, ['a83']))
|
2012-12-17 06:39:25 -05:00
|
|
|
fake_func.assert_has_calls(reqs, any_order=True)
|
|
|
|
self.assertEquals(fake_func.call_count, 2)
|
|
|
|
self.assertEquals(self.replicator.replication_count, 1)
|
|
|
|
self.assertEquals(self.replicator.suffix_sync, 2)
|
|
|
|
self.assertEquals(self.replicator.suffix_hash, 1)
|
|
|
|
self.assertEquals(self.replicator.suffix_count, 1)
|
2014-05-29 00:54:07 -07:00
|
|
|
|
|
|
|
# Efficient Replication Case
|
|
|
|
set_default(self)
|
|
|
|
self.replicator.sync = fake_func = \
|
|
|
|
mock.MagicMock(return_value=(True, []))
|
|
|
|
all_jobs = self.replicator.collect_jobs()
|
|
|
|
job = None
|
|
|
|
for tmp in all_jobs:
|
|
|
|
if tmp['partition'] == '3':
|
|
|
|
job = tmp
|
|
|
|
break
|
|
|
|
# The candidate nodes to replicate (i.e. dev1 and dev3)
|
|
|
|
# belong to another region
|
|
|
|
self.replicator.update(job)
|
|
|
|
self.assertEquals(fake_func.call_count, 1)
|
|
|
|
self.assertEquals(self.replicator.replication_count, 1)
|
|
|
|
self.assertEquals(self.replicator.suffix_sync, 1)
|
|
|
|
self.assertEquals(self.replicator.suffix_hash, 1)
|
|
|
|
self.assertEquals(self.replicator.suffix_count, 1)
|
|
|
|
|
2012-12-17 06:39:25 -05:00
|
|
|
mock_http.reset_mock()
|
|
|
|
mock_logger.reset_mock()
|
|
|
|
|
2014-03-18 10:50:17 -07:00
|
|
|
# test for replication params on policy 0 only
|
2012-12-17 06:39:25 -05:00
|
|
|
repl_job = local_job.copy()
|
|
|
|
for node in repl_job['nodes']:
|
2013-07-22 15:27:54 -07:00
|
|
|
node['replication_ip'] = '127.0.0.11'
|
|
|
|
node['replication_port'] = '6011'
|
2012-12-17 06:39:25 -05:00
|
|
|
set_default(self)
|
2014-09-18 21:16:35 -07:00
|
|
|
# with only one set of headers make sure we specify index 0 here
|
2014-03-18 10:50:17 -07:00
|
|
|
# as otherwise it may be different from earlier tests
|
2014-06-23 12:52:50 -07:00
|
|
|
self.headers['X-Backend-Storage-Policy-Index'] = 0
|
2012-12-17 06:39:25 -05:00
|
|
|
self.replicator.update(repl_job)
|
|
|
|
reqs = []
|
|
|
|
for node in repl_job['nodes']:
|
2013-08-31 23:42:43 -04:00
|
|
|
reqs.append(mock.call(node['replication_ip'],
|
|
|
|
node['replication_port'], node['device'],
|
|
|
|
repl_job['partition'], 'REPLICATE',
|
|
|
|
'', headers=self.headers))
|
|
|
|
reqs.append(mock.call(node['replication_ip'],
|
|
|
|
node['replication_port'], node['device'],
|
|
|
|
repl_job['partition'], 'REPLICATE',
|
|
|
|
'/a83', headers=self.headers))
|
2012-12-17 06:39:25 -05:00
|
|
|
mock_http.assert_has_calls(reqs, any_order=True)
|
|
|
|
|
|
|
|
|
2010-07-12 17:03:45 -05:00
|
|
|
if __name__ == '__main__':
|
|
|
|
unittest.main()
|