Files
swift/swift/obj/ssync_sender.py
gholt a80c720af5 Object replication ssync (an rsync alternative)
For this commit, ssync is just a direct replacement for how
we use rsync. Assuming we switch over to ssync completely
someday and drop rsync, we will then be able to improve the
algorithms even further (removing local objects as we
successfully transfer each one rather than waiting for whole
partitions, using an index.db with hash-trees, etc., etc.)

For easier review, this commit can be thought of in distinct
parts:

1)  New global_conf_callback functionality for allowing
    services to perform setup code before workers, etc. are
    launched. (This is then used by ssync in the object
    server to create a cross-worker semaphore to restrict
    concurrent incoming replication.)

2)  A bit of shifting of items up from object server and
    replicator to diskfile or DEFAULT conf sections for
    better sharing of the same settings. conn_timeout,
    node_timeout, client_timeout, network_chunk_size,
    disk_chunk_size.

3)  Modifications to the object server and replicator to
    optionally use ssync in place of rsync. This is done in
    a generic enough way that switching to FutureSync should
    be easy someday.

4)  The biggest part, and (at least for now) completely
    optional part, are the new ssync_sender and
    ssync_receiver files. Nice and isolated for easier
    testing and visibility into test coverage, etc.

All the usual logging, statsd, recon, etc. instrumentation
is still there when using ssync, just as it is when using
rsync.

Beyond the essential error and exceptional condition
logging, I have not added any additional instrumentation at
this time. Unless there is something someone finds super
pressing to have added to the logging, I think such
additions would be better as separate change reviews.

FOR NOW, IT IS NOT RECOMMENDED TO USE SSYNC ON PRODUCTION
CLUSTERS. Some of us will be in a limited fashion to look
for any subtle issues, tuning, etc. but generally ssync is
an experimental feature. In its current implementation it is
probably going to be a bit slower than rsync, but if all
goes according to plan it will end up much faster.

There are no comparisions yet between ssync and rsync other
than some raw virtual machine testing I've done to show it
should compete well enough once we can put it in use in the
real world.

If you Tweet, Google+, or whatever, be sure to indicate it's
experimental. It'd be best to keep it out of deployment
guides, howtos, etc. until we all figure out if we like it,
find it to be stable, etc.

Change-Id: If003dcc6f4109e2d2a42f4873a0779110fff16d6
2013-11-07 16:52:01 +00:00

310 lines
13 KiB
Python

# Copyright (c) 2013 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import with_statement
import urllib
import eventlet
import eventlet.wsgi
import eventlet.greenio
from swift.common import bufferedhttp
from swift.common import exceptions
from swift.common import http
class Sender(object):
"""
Sends REPLICATION requests to the object server.
These requests are eventually handled by
:py:mod:`.ssync_receiver` and full documentation about the
process is there.
"""
def __init__(self, daemon, node, job, suffixes):
self.daemon = daemon
self.node = node
self.job = job
self.suffixes = suffixes
self.connection = None
self.response = None
self.response_buffer = ''
self.response_chunk_left = 0
self.send_list = None
self.failures = 0
def __call__(self):
if not self.suffixes:
return True
try:
# Double try blocks in case our main error handler fails.
try:
# The general theme for these functions is that they should
# raise exceptions.MessageTimeout for client timeouts and
# exceptions.ReplicationException for common issues that will
# abort the replication attempt and log a simple error. All
# other exceptions will be logged with a full stack trace.
self.connect()
self.missing_check()
self.updates()
self.disconnect()
return self.failures == 0
except (exceptions.MessageTimeout,
exceptions.ReplicationException) as err:
self.daemon.logger.error(
'%s:%s/%s/%s %s', self.node.get('ip'),
self.node.get('port'), self.node.get('device'),
self.job.get('partition'), err)
except Exception:
# We don't want any exceptions to escape our code and possibly
# mess up the original replicator code that called us since it
# was originally written to shell out to rsync which would do
# no such thing.
self.daemon.logger.exception(
'%s:%s/%s/%s EXCEPTION in replication.Sender',
self.node.get('ip'), self.node.get('port'),
self.node.get('device'), self.job.get('partition'))
except Exception:
# We don't want any exceptions to escape our code and possibly
# mess up the original replicator code that called us since it
# was originally written to shell out to rsync which would do
# no such thing.
# This particular exception handler does the minimal amount as it
# would only get called if the above except Exception handler
# failed (bad node or job data).
self.daemon.logger.exception('EXCEPTION in replication.Sender')
return False
def connect(self):
"""
Establishes a connection and starts a REPLICATION request
with the object server.
"""
with exceptions.MessageTimeout(
self.daemon.conn_timeout, 'connect send'):
self.connection = bufferedhttp.BufferedHTTPConnection(
'%s:%s' % (self.node['ip'], self.node['port']))
self.connection.putrequest('REPLICATION', '/%s/%s' % (
self.node['device'], self.job['partition']))
self.connection.putheader('Transfer-Encoding', 'chunked')
self.connection.endheaders()
with exceptions.MessageTimeout(
self.daemon.node_timeout, 'connect receive'):
self.response = self.connection.getresponse()
if self.response.status != http.HTTP_OK:
raise exceptions.ReplicationException(
'Expected status %s; got %s' %
(http.HTTP_OK, self.response.status))
def readline(self):
"""
Reads a line from the REPLICATION response body.
httplib has no readline and will block on read(x) until x is
read, so we have to do the work ourselves. A bit of this is
taken from Python's httplib itself.
"""
data = self.response_buffer
self.response_buffer = ''
while '\n' not in data and len(data) < self.daemon.network_chunk_size:
if self.response_chunk_left == -1: # EOF-already indicator
break
if self.response_chunk_left == 0:
line = self.response.fp.readline()
i = line.find(';')
if i >= 0:
line = line[:i] # strip chunk-extensions
try:
self.response_chunk_left = int(line.strip(), 16)
except ValueError:
# close the connection as protocol synchronisation is
# probably lost
self.response.close()
raise exceptions.ReplicationException('Early disconnect')
if self.response_chunk_left == 0:
self.response_chunk_left = -1
break
chunk = self.response.fp.read(min(
self.response_chunk_left,
self.daemon.network_chunk_size - len(data)))
if not chunk:
# close the connection as protocol synchronisation is
# probably lost
self.response.close()
raise exceptions.ReplicationException('Early disconnect')
self.response_chunk_left -= len(chunk)
if self.response_chunk_left == 0:
self.response.fp.read(2) # discard the trailing \r\n
data += chunk
if '\n' in data:
data, self.response_buffer = data.split('\n', 1)
data += '\n'
return data
def missing_check(self):
"""
Handles the sender-side of the MISSING_CHECK step of a
REPLICATION request.
Full documentation of this can be found at
:py:meth:`.Receiver.missing_check`.
"""
# First, send our list.
with exceptions.MessageTimeout(
self.daemon.node_timeout, 'missing_check start'):
msg = ':MISSING_CHECK: START\r\n'
self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
for path, object_hash, timestamp in \
self.daemon._diskfile_mgr.yield_hashes(
self.job['device'], self.job['partition'], self.suffixes):
with exceptions.MessageTimeout(
self.daemon.node_timeout,
'missing_check send line'):
msg = '%s %s\r\n' % (
urllib.quote(object_hash),
urllib.quote(timestamp))
self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
with exceptions.MessageTimeout(
self.daemon.node_timeout, 'missing_check end'):
msg = ':MISSING_CHECK: END\r\n'
self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
# Now, retrieve the list of what they want.
while True:
with exceptions.MessageTimeout(
self.daemon.http_timeout, 'missing_check start wait'):
line = self.readline()
if not line:
raise exceptions.ReplicationException('Early disconnect')
line = line.strip()
if line == ':MISSING_CHECK: START':
break
elif line:
raise exceptions.ReplicationException(
'Unexpected response: %r' % line[:1024])
self.send_list = []
while True:
with exceptions.MessageTimeout(
self.daemon.http_timeout, 'missing_check line wait'):
line = self.readline()
if not line:
raise exceptions.ReplicationException('Early disconnect')
line = line.strip()
if line == ':MISSING_CHECK: END':
break
if line:
self.send_list.append(line)
def updates(self):
"""
Handles the sender-side of the UPDATES step of a REPLICATION
request.
Full documentation of this can be found at
:py:meth:`.Receiver.updates`.
"""
# First, send all our subrequests based on the send_list.
with exceptions.MessageTimeout(
self.daemon.node_timeout, 'updates start'):
msg = ':UPDATES: START\r\n'
self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
for object_hash in self.send_list:
try:
df = self.daemon._diskfile_mgr.get_diskfile_from_hash(
self.job['device'], self.job['partition'], object_hash)
except exceptions.DiskFileNotExist:
continue
url_path = urllib.quote(
'/%s/%s/%s' % (df.account, df.container, df.obj))
try:
df.open()
except exceptions.DiskFileDeleted as err:
self.send_delete(url_path, err.timestamp)
except exceptions.DiskFileError:
pass
else:
self.send_put(url_path, df)
with exceptions.MessageTimeout(
self.daemon.node_timeout, 'updates end'):
msg = ':UPDATES: END\r\n'
self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
# Now, read their response for any issues.
while True:
with exceptions.MessageTimeout(
self.daemon.http_timeout, 'updates start wait'):
line = self.readline()
if not line:
raise exceptions.ReplicationException('Early disconnect')
line = line.strip()
if line == ':UPDATES: START':
break
elif line:
raise exceptions.ReplicationException(
'Unexpected response: %r' % line[:1024])
while True:
with exceptions.MessageTimeout(
self.daemon.http_timeout, 'updates line wait'):
line = self.readline()
if not line:
raise exceptions.ReplicationException('Early disconnect')
line = line.strip()
if line == ':UPDATES: END':
break
elif line:
raise exceptions.ReplicationException(
'Unexpected response: %r' % line[:1024])
def send_delete(self, url_path, timestamp):
"""
Sends a DELETE subrequest with the given information.
"""
msg = ['DELETE ' + url_path, 'X-Timestamp: ' + timestamp]
msg = '\r\n'.join(msg) + '\r\n\r\n'
with exceptions.MessageTimeout(
self.daemon.node_timeout, 'send_delete'):
self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
def send_put(self, url_path, df):
"""
Sends a PUT subrequest for the url_path using the source df
(DiskFile) and content_length.
"""
msg = ['PUT ' + url_path, 'Content-Length: ' + str(df.content_length)]
# Sorted to make it easier to test.
for key, value in sorted(df.get_metadata().iteritems()):
if key not in ('name', 'Content-Length'):
msg.append('%s: %s' % (key, value))
msg = '\r\n'.join(msg) + '\r\n\r\n'
with exceptions.MessageTimeout(self.daemon.node_timeout, 'send_put'):
self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
for chunk in df.reader(iter_hook=eventlet.sleep):
with exceptions.MessageTimeout(
self.daemon.node_timeout, 'send_put chunk'):
self.connection.send('%x\r\n%s\r\n' % (len(chunk), chunk))
def disconnect(self):
"""
Closes down the connection to the object server once done
with the REPLICATION request.
"""
try:
with exceptions.MessageTimeout(
self.daemon.node_timeout, 'disconnect'):
self.connection.send('0\r\n\r\n')
except (Exception, exceptions.Timeout):
pass # We're okay with the above failing.
self.connection.close()