Merge "Add fallocate_reserve to account and container servers."

This commit is contained in:
Zuul 2018-07-20 08:42:51 +00:00 committed by Gerrit Code Review
commit 89854250c3
10 changed files with 321 additions and 7 deletions

View File

@ -298,10 +298,27 @@ Preventing Disk Full Scenarios
Prevent disk full scenarios by ensuring that the ``proxy-server`` blocks PUT Prevent disk full scenarios by ensuring that the ``proxy-server`` blocks PUT
requests and rsync prevents replication to the specific drives. requests and rsync prevents replication to the specific drives.
You can prevent `proxy-server` PUT requests to low space disks by ensuring You can prevent `proxy-server` PUT requests to low space disks by
``fallocate_reserve`` is set in the ``object-server.conf``. By default, ensuring ``fallocate_reserve`` is set in ``account-server.conf``,
``fallocate_reserve`` is set to 1%. This blocks PUT requests that leave the ``container-server.conf``, and ``object-server.conf``. By default,
free disk space below 1% of the disk. ``fallocate_reserve`` is set to 1%. In the object server, this blocks
PUT requests that would leave the free disk space below 1% of the
disk. In the account and container servers, this blocks operations
that will increase account or container database size once the free
disk space falls below 1%.
Setting ``fallocate_reserve`` is highly recommended to avoid filling
disks to 100%. When Swift's disks are completely full, all requests
involving those disks will fail, including DELETE requests that would
otherwise free up space. This is because object deletion includes the
creation of a zero-byte tombstone (.ts) to record the time of the
deletion for replication purposes; this happens prior to deletion of
the object's data. On a completely-full filesystem, that zero-byte .ts
file cannot be created, so the DELETE request will fail and the disk
will remain completely full. If ``fallocate_reserve`` is set, then the
filesystem will have enough space to create the zero-byte .ts file,
and thus the deletion of the object will succeed and free up some
space.
In order to prevent rsync replication to specific drives, firstly In order to prevent rsync replication to specific drives, firstly
setup ``rsync_module`` per disk in your ``object-replicator``. setup ``rsync_module`` per disk in your ``object-replicator``.

View File

@ -97,6 +97,13 @@ use = egg:swift#account
# Work only with ionice_class. # Work only with ionice_class.
# ionice_class = # ionice_class =
# ionice_priority = # ionice_priority =
#
# You can set fallocate_reserve to the number of bytes or percentage
# of disk space you'd like kept free at all times. If the disk's free
# space falls below this value, then PUT, POST, and REPLICATE requests
# will be denied until the disk ha s more space available. Percentage
# will be used if the value ends with a '%'.
# fallocate_reserve = 1%
[filter:healthcheck] [filter:healthcheck]
use = egg:swift#healthcheck use = egg:swift#healthcheck

View File

@ -110,6 +110,13 @@ use = egg:swift#container
# Work only with ionice_class. # Work only with ionice_class.
# ionice_class = # ionice_class =
# ionice_priority = # ionice_priority =
#
# You can set fallocate_reserve to the number of bytes or percentage
# of disk space you'd like kept free at all times. If the disk's free
# space falls below this value, then PUT, POST, and REPLICATE requests
# will be denied until the disk ha s more space available. Percentage
# will be used if the value ends with a '%'.
# fallocate_reserve = 1%
[filter:healthcheck] [filter:healthcheck]
use = egg:swift#healthcheck use = egg:swift#healthcheck

View File

@ -28,7 +28,8 @@ from swift.common.request_helpers import get_param, \
split_and_validate_path split_and_validate_path
from swift.common.utils import get_logger, hash_path, public, \ from swift.common.utils import get_logger, hash_path, public, \
Timestamp, storage_directory, config_true_value, \ Timestamp, storage_directory, config_true_value, \
json, timing_stats, replication, get_log_line json, timing_stats, replication, get_log_line, \
config_fallocate_value, fs_has_free_space
from swift.common.constraints import valid_timestamp, check_utf8, check_drive from swift.common.constraints import valid_timestamp, check_utf8, check_drive
from swift.common import constraints from swift.common import constraints
from swift.common.db_replicator import ReplicatorRpc from swift.common.db_replicator import ReplicatorRpc
@ -60,6 +61,8 @@ class AccountController(BaseStorageServer):
conf.get('auto_create_account_prefix') or '.' conf.get('auto_create_account_prefix') or '.'
swift.common.db.DB_PREALLOCATION = \ swift.common.db.DB_PREALLOCATION = \
config_true_value(conf.get('db_preallocation', 'f')) config_true_value(conf.get('db_preallocation', 'f'))
self.fallocate_reserve, self.fallocate_is_percent = \
config_fallocate_value(conf.get('fallocate_reserve', '1%'))
def _get_account_broker(self, drive, part, account, **kwargs): def _get_account_broker(self, drive, part, account, **kwargs):
hsh = hash_path(account) hsh = hash_path(account)
@ -83,6 +86,11 @@ class AccountController(BaseStorageServer):
pass pass
return resp(request=req, headers=headers, charset='utf-8', body=body) return resp(request=req, headers=headers, charset='utf-8', body=body)
def check_free_space(self, drive):
drive_root = os.path.join(self.root, drive)
return fs_has_free_space(
drive_root, self.fallocate_reserve, self.fallocate_is_percent)
@public @public
@timing_stats() @timing_stats()
def DELETE(self, req): def DELETE(self, req):
@ -108,6 +116,8 @@ class AccountController(BaseStorageServer):
check_drive(self.root, drive, self.mount_check) check_drive(self.root, drive, self.mount_check)
except ValueError: except ValueError:
return HTTPInsufficientStorage(drive=drive, request=req) return HTTPInsufficientStorage(drive=drive, request=req)
if not self.check_free_space(drive):
return HTTPInsufficientStorage(drive=drive, request=req)
if container: # put account container if container: # put account container
if 'x-timestamp' not in req.headers: if 'x-timestamp' not in req.headers:
timestamp = Timestamp.now() timestamp = Timestamp.now()
@ -237,6 +247,8 @@ class AccountController(BaseStorageServer):
check_drive(self.root, drive, self.mount_check) check_drive(self.root, drive, self.mount_check)
except ValueError: except ValueError:
return HTTPInsufficientStorage(drive=drive, request=req) return HTTPInsufficientStorage(drive=drive, request=req)
if not self.check_free_space(drive):
return HTTPInsufficientStorage(drive=drive, request=req)
try: try:
args = json.load(req.environ['wsgi.input']) args = json.load(req.environ['wsgi.input'])
except ValueError as err: except ValueError as err:
@ -255,6 +267,8 @@ class AccountController(BaseStorageServer):
check_drive(self.root, drive, self.mount_check) check_drive(self.root, drive, self.mount_check)
except ValueError: except ValueError:
return HTTPInsufficientStorage(drive=drive, request=req) return HTTPInsufficientStorage(drive=drive, request=req)
if not self.check_free_space(drive):
return HTTPInsufficientStorage(drive=drive, request=req)
broker = self._get_account_broker(drive, part, account) broker = self._get_account_broker(drive, part, account)
if broker.is_deleted(): if broker.is_deleted():
return self._deleted_response(broker, req, HTTPNotFound) return self._deleted_response(broker, req, HTTPNotFound)

View File

@ -918,6 +918,13 @@ class ReplicatorRpc(object):
quarantine_db(broker.db_file, broker.db_type) quarantine_db(broker.db_file, broker.db_type)
return HTTPNotFound() return HTTPNotFound()
raise raise
# TODO(mattoliverau) At this point in the RPC, we have the callers
# replication info and ours, so it would be cool to be able to make
# an educated guess here on the size of the incoming replication (maybe
# average object table row size * difference in ROWIDs or something)
# and the fallocate_reserve setting so we could return a 507.
# This would make db fallocate_reserve more or less on par with the
# object's.
if remote_info['metadata']: if remote_info['metadata']:
with self.debug_timing('update_metadata'): with self.debug_timing('update_metadata'):
broker.update_metadata(remote_info['metadata']) broker.update_metadata(remote_info['metadata'])

View File

@ -773,6 +773,35 @@ class FileLikeIter(object):
self.closed = True self.closed = True
def fs_has_free_space(fs_path, space_needed, is_percent):
"""
Check to see whether or not a filesystem has the given amount of space
free. Unlike fallocate(), this does not reserve any space.
:param fs_path: path to a file or directory on the filesystem; typically
the path to the filesystem's mount point
:param space_needed: minimum bytes or percentage of free space
:param is_percent: if True, then space_needed is treated as a percentage
of the filesystem's capacity; if False, space_needed is a number of
free bytes.
:returns: True if the filesystem has at least that much free space,
False otherwise
:raises OSError: if fs_path does not exist
"""
st = os.statvfs(fs_path)
free_bytes = st.f_frsize * st.f_bavail
if is_percent:
size_bytes = st.f_frsize * st.f_blocks
free_percent = float(free_bytes) / float(size_bytes) * 100
return free_percent >= space_needed
else:
return free_bytes >= space_needed
class FallocateWrapper(object): class FallocateWrapper(object):
def __init__(self, noop=False): def __init__(self, noop=False):

View File

@ -34,8 +34,9 @@ from swift.common.request_helpers import get_param, \
from swift.common.utils import get_logger, hash_path, public, \ from swift.common.utils import get_logger, hash_path, public, \
Timestamp, storage_directory, validate_sync_to, \ Timestamp, storage_directory, validate_sync_to, \
config_true_value, timing_stats, replication, \ config_true_value, timing_stats, replication, \
override_bytes_from_content_type, get_log_line, ShardRange, list_from_csv override_bytes_from_content_type, get_log_line, \
config_fallocate_value, fs_has_free_space, list_from_csv, \
ShardRange
from swift.common.constraints import valid_timestamp, check_utf8, check_drive from swift.common.constraints import valid_timestamp, check_utf8, check_drive
from swift.common import constraints from swift.common import constraints
from swift.common.bufferedhttp import http_connect from swift.common.bufferedhttp import http_connect
@ -124,6 +125,8 @@ class ContainerController(BaseStorageServer):
self.sync_store = ContainerSyncStore(self.root, self.sync_store = ContainerSyncStore(self.root,
self.logger, self.logger,
self.mount_check) self.mount_check)
self.fallocate_reserve, self.fallocate_is_percent = \
config_fallocate_value(conf.get('fallocate_reserve', '1%'))
def _get_container_broker(self, drive, part, account, container, **kwargs): def _get_container_broker(self, drive, part, account, container, **kwargs):
""" """
@ -298,6 +301,11 @@ class ContainerController(BaseStorageServer):
req.environ['swift.leave_relative_location'] = True req.environ['swift.leave_relative_location'] = True
return HTTPMovedPermanently(headers=headers, request=req) return HTTPMovedPermanently(headers=headers, request=req)
def check_free_space(self, drive):
drive_root = os.path.join(self.root, drive)
return fs_has_free_space(
drive_root, self.fallocate_reserve, self.fallocate_is_percent)
@public @public
@timing_stats() @timing_stats()
def DELETE(self, req): def DELETE(self, req):
@ -438,6 +446,8 @@ class ContainerController(BaseStorageServer):
check_drive(self.root, drive, self.mount_check) check_drive(self.root, drive, self.mount_check)
except ValueError: except ValueError:
return HTTPInsufficientStorage(drive=drive, request=req) return HTTPInsufficientStorage(drive=drive, request=req)
if not self.check_free_space(drive):
return HTTPInsufficientStorage(drive=drive, request=req)
requested_policy_index = self.get_and_validate_policy_index(req) requested_policy_index = self.get_and_validate_policy_index(req)
broker = self._get_container_broker(drive, part, account, container) broker = self._get_container_broker(drive, part, account, container)
if obj: # put container object if obj: # put container object
@ -726,6 +736,8 @@ class ContainerController(BaseStorageServer):
check_drive(self.root, drive, self.mount_check) check_drive(self.root, drive, self.mount_check)
except ValueError: except ValueError:
return HTTPInsufficientStorage(drive=drive, request=req) return HTTPInsufficientStorage(drive=drive, request=req)
if not self.check_free_space(drive):
return HTTPInsufficientStorage(drive=drive, request=req)
try: try:
args = json.load(req.environ['wsgi.input']) args = json.load(req.environ['wsgi.input'])
except ValueError as err: except ValueError as err:
@ -750,6 +762,8 @@ class ContainerController(BaseStorageServer):
check_drive(self.root, drive, self.mount_check) check_drive(self.root, drive, self.mount_check)
except ValueError: except ValueError:
return HTTPInsufficientStorage(drive=drive, request=req) return HTTPInsufficientStorage(drive=drive, request=req)
if not self.check_free_space(drive):
return HTTPInsufficientStorage(drive=drive, request=req)
broker = self._get_container_broker(drive, part, account, container) broker = self._get_container_broker(drive, part, account, container)
if broker.is_deleted(): if broker.is_deleted():
return HTTPNotFound(request=req) return HTTPNotFound(request=req)

View File

@ -16,6 +16,7 @@
import errno import errno
import os import os
import mock import mock
import posix
import unittest import unittest
from tempfile import mkdtemp from tempfile import mkdtemp
from shutil import rmtree from shutil import rmtree
@ -192,6 +193,33 @@ class TestAccountController(unittest.TestCase):
resp = req.get_response(self.controller) resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 400) self.assertEqual(resp.status_int, 400)
def test_REPLICATE_insufficient_space(self):
conf = {'devices': self.testdir,
'mount_check': 'false',
'fallocate_reserve': '2%'}
account_controller = AccountController(conf)
req = Request.blank('/sda1/p/a',
environ={'REQUEST_METHOD': 'REPLICATE'})
statvfs_result = posix.statvfs_result([
4096, # f_bsize
4096, # f_frsize
2854907, # f_blocks
59000, # f_bfree
57000, # f_bavail (just under 2% free)
1280000, # f_files
1266040, # f_ffree,
1266040, # f_favail,
4096, # f_flag
255, # f_namemax
])
with mock.patch('os.statvfs',
return_value=statvfs_result) as mock_statvfs:
resp = req.get_response(account_controller)
self.assertEqual(resp.status_int, 507)
self.assertEqual(mock_statvfs.mock_calls,
[mock.call(os.path.join(self.testdir, 'sda1'))])
def test_REPLICATE_rsync_then_merge_works(self): def test_REPLICATE_rsync_then_merge_works(self):
def fake_rsync_then_merge(self, drive, db_file, args): def fake_rsync_then_merge(self, drive, db_file, args):
return HTTPNoContent() return HTTPNoContent()
@ -379,6 +407,35 @@ class TestAccountController(unittest.TestCase):
self.assertEqual(resp.status_int, 404) self.assertEqual(resp.status_int, 404)
self.assertNotIn('X-Account-Status', resp.headers) self.assertNotIn('X-Account-Status', resp.headers)
def test_PUT_insufficient_space(self):
conf = {'devices': self.testdir,
'mount_check': 'false',
'fallocate_reserve': '2%'}
account_controller = AccountController(conf)
req = Request.blank(
'/sda1/p/a',
environ={'REQUEST_METHOD': 'PUT'},
headers={'X-Timestamp': '1517612949.541469'})
statvfs_result = posix.statvfs_result([
4096, # f_bsize
4096, # f_frsize
2854907, # f_blocks
59000, # f_bfree
57000, # f_bavail (just under 2% free)
1280000, # f_files
1266040, # f_ffree,
1266040, # f_favail,
4096, # f_flag
255, # f_namemax
])
with mock.patch('os.statvfs',
return_value=statvfs_result) as mock_statvfs:
resp = req.get_response(account_controller)
self.assertEqual(resp.status_int, 507)
self.assertEqual(mock_statvfs.mock_calls,
[mock.call(os.path.join(self.testdir, 'sda1'))])
def test_PUT(self): def test_PUT(self):
req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT', req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'}) 'HTTP_X_TIMESTAMP': '0'})
@ -701,6 +758,35 @@ class TestAccountController(unittest.TestCase):
resp = req.get_response(self.controller) resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 400) self.assertEqual(resp.status_int, 400)
def test_POST_insufficient_space(self):
conf = {'devices': self.testdir,
'mount_check': 'false',
'fallocate_reserve': '2%'}
account_controller = AccountController(conf)
req = Request.blank(
'/sda1/p/a',
environ={'REQUEST_METHOD': 'POST'},
headers={'X-Timestamp': '1517611584.937603'})
statvfs_result = posix.statvfs_result([
4096, # f_bsize
4096, # f_frsize
2854907, # f_blocks
59000, # f_bfree
57000, # f_bavail (just under 2% free)
1280000, # f_files
1266040, # f_ffree,
1266040, # f_favail,
4096, # f_flag
255, # f_namemax
])
with mock.patch('os.statvfs',
return_value=statvfs_result) as mock_statvfs:
resp = req.get_response(account_controller)
self.assertEqual(resp.status_int, 507)
self.assertEqual(mock_statvfs.mock_calls,
[mock.call(os.path.join(self.testdir, 'sda1'))])
def test_POST_timestamp_not_float(self): def test_POST_timestamp_not_float(self):
req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'POST', req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'POST',
'HTTP_X_TIMESTAMP': '0'}, 'HTTP_X_TIMESTAMP': '0'},

View File

@ -34,6 +34,7 @@ import logging
import platform import platform
import os import os
import mock import mock
import posix
import pwd import pwd
import random import random
import re import re
@ -6690,6 +6691,51 @@ class TestHashForFileFunction(unittest.TestCase):
'\n'.join(failures)) '\n'.join(failures))
class TestFsHasFreeSpace(unittest.TestCase):
def test_bytes(self):
fake_result = posix.statvfs_result([
4096, # f_bsize
4096, # f_frsize
2854907, # f_blocks
1984802, # f_bfree (free blocks for root)
1728089, # f_bavail (free blocks for non-root)
1280000, # f_files
1266040, # f_ffree,
1266040, # f_favail,
4096, # f_flag
255, # f_namemax
])
with mock.patch('os.statvfs', return_value=fake_result):
self.assertTrue(utils.fs_has_free_space("/", 0, False))
self.assertTrue(utils.fs_has_free_space("/", 1, False))
# free space left = f_bavail * f_bsize = 7078252544
self.assertTrue(utils.fs_has_free_space("/", 7078252544, False))
self.assertFalse(utils.fs_has_free_space("/", 7078252545, False))
self.assertFalse(utils.fs_has_free_space("/", 2 ** 64, False))
def test_percent(self):
fake_result = posix.statvfs_result([
4096, # f_bsize
4096, # f_frsize
2854907, # f_blocks
1984802, # f_bfree (free blocks for root)
1728089, # f_bavail (free blocks for non-root)
1280000, # f_files
1266040, # f_ffree,
1266040, # f_favail,
4096, # f_flag
255, # f_namemax
])
with mock.patch('os.statvfs', return_value=fake_result):
self.assertTrue(utils.fs_has_free_space("/", 0, True))
self.assertTrue(utils.fs_has_free_space("/", 1, True))
# percentage of free space for the faked statvfs is 60%
self.assertTrue(utils.fs_has_free_space("/", 60, True))
self.assertFalse(utils.fs_has_free_space("/", 61, True))
self.assertFalse(utils.fs_has_free_space("/", 100, True))
self.assertFalse(utils.fs_has_free_space("/", 110, True))
class TestSetSwiftDir(unittest.TestCase): class TestSetSwiftDir(unittest.TestCase):
def setUp(self): def setUp(self):
self.swift_dir = tempfile.mkdtemp() self.swift_dir = tempfile.mkdtemp()

View File

@ -16,6 +16,7 @@
import operator import operator
import os import os
import posix
import mock import mock
import unittest import unittest
import itertools import itertools
@ -426,6 +427,35 @@ class TestContainerController(unittest.TestCase):
resp = req.get_response(self.controller) resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 202) self.assertEqual(resp.status_int, 202)
def test_PUT_insufficient_space(self):
conf = {'devices': self.testdir,
'mount_check': 'false',
'fallocate_reserve': '2%'}
container_controller = container_server.ContainerController(conf)
req = Request.blank(
'/sda1/p/a/c',
environ={'REQUEST_METHOD': 'PUT'},
headers={'X-Timestamp': '1517617825.74832'})
statvfs_result = posix.statvfs_result([
4096, # f_bsize
4096, # f_frsize
2854907, # f_blocks
59000, # f_bfree
57000, # f_bavail (just under 2% free)
1280000, # f_files
1266040, # f_ffree,
1266040, # f_favail,
4096, # f_flag
255, # f_namemax
])
with mock.patch('os.statvfs',
return_value=statvfs_result) as mock_statvfs:
resp = req.get_response(container_controller)
self.assertEqual(resp.status_int, 507)
self.assertEqual(mock_statvfs.mock_calls,
[mock.call(os.path.join(self.testdir, 'sda1'))])
def test_PUT_simulated_create_race(self): def test_PUT_simulated_create_race(self):
state = ['initial'] state = ['initial']
@ -1001,6 +1031,35 @@ class TestContainerController(unittest.TestCase):
resp = req.get_response(self.controller) resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 400) self.assertEqual(resp.status_int, 400)
def test_POST_insufficient_space(self):
conf = {'devices': self.testdir,
'mount_check': 'false',
'fallocate_reserve': '2%'}
container_controller = container_server.ContainerController(conf)
req = Request.blank(
'/sda1/p/a/c',
environ={'REQUEST_METHOD': 'POST'},
headers={'X-Timestamp': '1517618035.469202'})
statvfs_result = posix.statvfs_result([
4096, # f_bsize
4096, # f_frsize
2854907, # f_blocks
59000, # f_bfree
57000, # f_bavail (just under 2% free)
1280000, # f_files
1266040, # f_ffree,
1266040, # f_favail,
4096, # f_flag
255, # f_namemax
])
with mock.patch('os.statvfs',
return_value=statvfs_result) as mock_statvfs:
resp = req.get_response(container_controller)
self.assertEqual(resp.status_int, 507)
self.assertEqual(mock_statvfs.mock_calls,
[mock.call(os.path.join(self.testdir, 'sda1'))])
def test_POST_timestamp_not_float(self): def test_POST_timestamp_not_float(self):
req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'}) 'HTTP_X_TIMESTAMP': '0'})
@ -1388,6 +1447,34 @@ class TestContainerController(unittest.TestCase):
resp = req.get_response(self.controller) resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 500) self.assertEqual(resp.status_int, 500)
def test_REPLICATE_insufficient_space(self):
conf = {'devices': self.testdir,
'mount_check': 'false',
'fallocate_reserve': '2%'}
container_controller = container_server.ContainerController(conf)
req = Request.blank(
'/sda1/p/a/',
environ={'REQUEST_METHOD': 'REPLICATE'})
statvfs_result = posix.statvfs_result([
4096, # f_bsize
4096, # f_frsize
2854907, # f_blocks
59000, # f_bfree
57000, # f_bavail (just under 2% free)
1280000, # f_files
1266040, # f_ffree,
1266040, # f_favail,
4096, # f_flag
255, # f_namemax
])
with mock.patch('os.statvfs',
return_value=statvfs_result) as mock_statvfs:
resp = req.get_response(container_controller)
self.assertEqual(resp.status_int, 507)
self.assertEqual(mock_statvfs.mock_calls,
[mock.call(os.path.join(self.testdir, 'sda1'))])
def test_DELETE(self): def test_DELETE(self):
ts_iter = make_timestamp_iter() ts_iter = make_timestamp_iter()
req = Request.blank( req = Request.blank(