diff --git a/bin/swift-container-sharder b/bin/swift-container-sharder new file mode 100755 index 0000000000..3e6551319b --- /dev/null +++ b/bin/swift-container-sharder @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Copyright (c) 2010-2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from swift.container.sharder import ContainerSharder +from swift.common.utils import parse_options +from swift.common.daemon import run_daemon +from optparse import OptionParser + +if __name__ == '__main__': + parser = OptionParser("%prog CONFIG [options]") + parser.add_option('-d', '--devices', + help='Shard containers only on given devices. ' + 'Comma-separated list. ' + 'Only has effect if --once is used.') + parser.add_option('-p', '--partitions', + help='Shard containers only in given partitions. ' + 'Comma-separated list. ' + 'Only has effect if --once is used.') + conf_file, options = parse_options(parser=parser, once=True) + run_daemon(ContainerSharder, conf_file, **options) diff --git a/doc/saio/swift/container-server/1.conf b/doc/saio/swift/container-server/1.conf index 5bf3c0f28c..e71a5b6683 100644 --- a/doc/saio/swift/container-server/1.conf +++ b/doc/saio/swift/container-server/1.conf @@ -27,3 +27,13 @@ rsync_module = {replication_ip}::container{replication_port} [container-auditor] [container-sync] + +[container-sharder] +auto_shard = true +rsync_module = {replication_ip}::container{replication_port} +# This is intentionally much smaller than the default of 1,000,000 so tests +# can run in a reasonable amount of time +shard_container_threshold = 100 +# The probe tests make explicit assumptions about the batch sizes +shard_scanner_batch_size = 10 +cleave_batch_size = 2 diff --git a/doc/saio/swift/container-server/2.conf b/doc/saio/swift/container-server/2.conf index 0b29ada029..86e58a9fde 100644 --- a/doc/saio/swift/container-server/2.conf +++ b/doc/saio/swift/container-server/2.conf @@ -27,3 +27,13 @@ rsync_module = {replication_ip}::container{replication_port} [container-auditor] [container-sync] + +[container-sharder] +auto_shard = true +rsync_module = {replication_ip}::container{replication_port} +# This is intentionally much smaller than the default of 1,000,000 so tests +# can run in a reasonable amount of time +shard_container_threshold = 100 +# The probe tests make explicit assumptions about the batch sizes +shard_scanner_batch_size = 10 +cleave_batch_size = 2 diff --git a/doc/saio/swift/container-server/3.conf b/doc/saio/swift/container-server/3.conf index 9f340d07e6..73e760af15 100644 --- a/doc/saio/swift/container-server/3.conf +++ b/doc/saio/swift/container-server/3.conf @@ -27,3 +27,13 @@ rsync_module = {replication_ip}::container{replication_port} [container-auditor] [container-sync] + +[container-sharder] +auto_shard = true +rsync_module = {replication_ip}::container{replication_port} +# This is intentionally much smaller than the default of 1,000,000 so tests +# can run in a reasonable amount of time +shard_container_threshold = 100 +# The probe tests make explicit assumptions about the batch sizes +shard_scanner_batch_size = 10 +cleave_batch_size = 2 diff --git a/doc/saio/swift/container-server/4.conf b/doc/saio/swift/container-server/4.conf index 5e95e9c57c..c254191b8f 100644 --- a/doc/saio/swift/container-server/4.conf +++ b/doc/saio/swift/container-server/4.conf @@ -27,3 +27,13 @@ rsync_module = {replication_ip}::container{replication_port} [container-auditor] [container-sync] + +[container-sharder] +auto_shard = true +rsync_module = {replication_ip}::container{replication_port} +# This is intentionally much smaller than the default of 1,000,000 so tests +# can run in a reasonable amount of time +shard_container_threshold = 100 +# The probe tests make explicit assumptions about the batch sizes +shard_scanner_batch_size = 10 +cleave_batch_size = 2 diff --git a/doc/saio/swift/internal-client.conf b/doc/saio/swift/internal-client.conf new file mode 100644 index 0000000000..052d1e7549 --- /dev/null +++ b/doc/saio/swift/internal-client.conf @@ -0,0 +1,24 @@ +[DEFAULT] + +[pipeline:main] +pipeline = catch_errors proxy-logging cache symlink proxy-server + +[app:proxy-server] +use = egg:swift#proxy +account_autocreate = true +# See proxy-server.conf-sample for options + +[filter:symlink] +use = egg:swift#symlink +# See proxy-server.conf-sample for options + +[filter:cache] +use = egg:swift#memcache +# See proxy-server.conf-sample for options + +[filter:proxy-logging] +use = egg:swift#proxy_logging + +[filter:catch_errors] +use = egg:swift#catch_errors +# See proxy-server.conf-sample for options diff --git a/doc/source/container.rst b/doc/source/container.rst index dcff33e3aa..bc95753852 100644 --- a/doc/source/container.rst +++ b/doc/source/container.rst @@ -24,6 +24,16 @@ Container Backend :undoc-members: :show-inheritance: +.. _container-replicator: + +Container Replicator +==================== + +.. automodule:: swift.container.replicator + :members: + :undoc-members: + :show-inheritance: + .. _container-server: Container Server @@ -44,12 +54,12 @@ Container Reconciler :undoc-members: :show-inheritance: -.. _container-replicator: +.. _container-sharder: -Container Replicator -==================== +Container Sharder +================= -.. automodule:: swift.container.replicator +.. automodule:: swift.container.sharder :members: :undoc-members: :show-inheritance: diff --git a/doc/source/images/sharded_GET.svg b/doc/source/images/sharded_GET.svg new file mode 100644 index 0000000000..03c271b5cc --- /dev/null +++ b/doc/source/images/sharded_GET.svg @@ -0,0 +1,2019 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + cont-aef34f-<ts>-3 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + "igloo" - "linux" + + cont-4837ad-<ts>-4 + "linux" - "" + + proxy + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + diff --git a/doc/source/images/sharding_GET.svg b/doc/source/images/sharding_GET.svg new file mode 100644 index 0000000000..5e9240feeb --- /dev/null +++ b/doc/source/images/sharding_GET.svg @@ -0,0 +1,2112 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + cont (retiring db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + cat + + giraffe + + igloo + + linux + + cont-aef34f-<ts>-3 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + "igloo" - "linux" + "linux" - "" + + + proxy + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 3 + + + + 4 + + + + + + + diff --git a/doc/source/images/sharding_cleave1_load.svg b/doc/source/images/sharding_cleave1_load.svg new file mode 100644 index 0000000000..4485e3ea09 --- /dev/null +++ b/doc/source/images/sharding_cleave1_load.svg @@ -0,0 +1,1694 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + cont (retiring db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + + + + + + + cat + + giraffe + + igloo + "igloo" - "" + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + diff --git a/doc/source/images/sharding_cleave2_load.svg b/doc/source/images/sharding_cleave2_load.svg new file mode 100644 index 0000000000..548aab56ab --- /dev/null +++ b/doc/source/images/sharding_cleave2_load.svg @@ -0,0 +1,1754 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + cont (retiring db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + cat + + + + + + giraffe + + + + + + igloo + + linux + + + + cont-aef34f-<ts>-3 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + "igloo" - "linux" + "linux" - "" + diff --git a/doc/source/images/sharding_cleave_basic.svg b/doc/source/images/sharding_cleave_basic.svg new file mode 100644 index 0000000000..fd5069754f --- /dev/null +++ b/doc/source/images/sharding_cleave_basic.svg @@ -0,0 +1,649 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /.shards_acct + /acct + cont-568d8e-<ts>-0 + cont-750ed3-<ts>-1 + cont + + diff --git a/doc/source/images/sharding_db_states.svg b/doc/source/images/sharding_db_states.svg new file mode 100644 index 0000000000..6693ef9b3a --- /dev/null +++ b/doc/source/images/sharding_db_states.svg @@ -0,0 +1,1502 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Container DB + + + Container DB + + + + + + + Retiring DB + + + Retiring DB + + + + + + + Fresh DB + + + Fresh DB + + + + + + + Fresh DB + + + Fresh DB + + + + + + + + + SHARDED + + + SHARDED + + + + + + + + + + + UNSHARDED + + + UNSHARDED + + + + + + + SHARDING + + + SHARDING + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/images/sharding_scan_basic.svg b/doc/source/images/sharding_scan_basic.svg new file mode 100644 index 0000000000..54c30f0d8d --- /dev/null +++ b/doc/source/images/sharding_scan_basic.svg @@ -0,0 +1,259 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /acct + cont + + + cat + giraffe + diff --git a/doc/source/images/sharding_scan_load.svg b/doc/source/images/sharding_scan_load.svg new file mode 100644 index 0000000000..327ac1a06c --- /dev/null +++ b/doc/source/images/sharding_scan_load.svg @@ -0,0 +1,1665 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + cont (retiring db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + + + + + cat + + giraffe + + igloo + "igloo" - "" + diff --git a/doc/source/images/sharding_sharded_load.svg b/doc/source/images/sharding_sharded_load.svg new file mode 100644 index 0000000000..ae9aacb86c --- /dev/null +++ b/doc/source/images/sharding_sharded_load.svg @@ -0,0 +1,1650 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + + + cont-aef34f-<ts>-3 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + "igloo" - "linux" + + + + cont-4837ad-<ts>-4 + "linux" - "" + diff --git a/doc/source/images/sharding_unsharded.svg b/doc/source/images/sharding_unsharded.svg new file mode 100644 index 0000000000..4241b0de13 --- /dev/null +++ b/doc/source/images/sharding_unsharded.svg @@ -0,0 +1,199 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /acct + cont + diff --git a/doc/source/images/sharding_unsharded_load.svg b/doc/source/images/sharding_unsharded_load.svg new file mode 100644 index 0000000000..e613e8cbbd --- /dev/null +++ b/doc/source/images/sharding_unsharded_load.svg @@ -0,0 +1,219 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont + /acct + diff --git a/doc/source/index.rst b/doc/source/index.rst index 63df790815..b72925c6dd 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -62,6 +62,7 @@ Overview and Concepts overview_erasure_code overview_encryption overview_backing_store + overview_container_sharding ring_background ring_partpower associated_projects diff --git a/doc/source/logs.rst b/doc/source/logs.rst index f9a8ba2c62..1a5d2656c2 100644 --- a/doc/source/logs.rst +++ b/doc/source/logs.rst @@ -105,6 +105,7 @@ RL :ref:`ratelimit` VW :ref:`versioned_writes` SSC :ref:`copy` SYM :ref:`symlink` +SH :ref:`sharding_doc` ======================= ============================= diff --git a/doc/source/overview_architecture.rst b/doc/source/overview_architecture.rst index 30b26a471f..b0ae293d9a 100644 --- a/doc/source/overview_architecture.rst +++ b/doc/source/overview_architecture.rst @@ -172,6 +172,8 @@ replicator for Replication type policies. See :doc:`overview_erasure_code` for complete information on both Erasure Code support as well as the reconstructor. +.. _architecture_updaters: + -------- Updaters -------- diff --git a/doc/source/overview_container_sharding.rst b/doc/source/overview_container_sharding.rst new file mode 100644 index 0000000000..110fcc8f87 --- /dev/null +++ b/doc/source/overview_container_sharding.rst @@ -0,0 +1,784 @@ +.. _sharding_doc: + +================== +Container Sharding +================== + +Container sharding is an operator controlled feature that may be used to shard +very large container databases into a number of smaller shard containers + +.. note:: + + Container sharding is currently an experimental feature. It is strongly + recommended that operators gain experience of sharding containers in a + non-production cluster before using in production. + + The sharding process involves moving all sharding container database + records via the container replication engine; the time taken to complete + sharding is dependent upon the existing cluster load and the performance of + the container database being sharded. + + There is currently no documented process for reversing the sharding + process once sharding has been enabled. + + +---------- +Background +---------- +The metadata for each container in Swift is stored in an SQLite database. This +metadata includes: information about the container such as its name, +modification time and current object count; user metadata that may been written +to the container by clients; a record of every object in the container. The +container database object records are used to generate container listings in +response to container GET requests; each object record stores the object's +name, size, hash and content-type as well as associated timestamps. + +As the number of objects in a container increases then the number of object +records in the container database increases. Eventually the container database +performance starts to degrade and the time taken to update an object record +increases. This can result in object updates timing out, with a corresponding +increase in the backlog of pending :ref:`asynchronous updates +` on object servers. Container databases are typically +replicated on several nodes and any database performance degradation can also +result in longer :doc:`container replication ` times. + +The point at which container database performance starts to degrade depends +upon the choice of hardware in the container ring. Anecdotal evidence suggests +that containers with tens of millions of object records have noticeably +degraded performance. + +This performance degradation can be avoided by ensuring that clients use an +object naming scheme that disperses objects across a number of containers +thereby distributing load across a number of container databases. However, that +is not always desirable nor is it under the control of the cluster operator. + +Swift's container sharding feature provides the operator with a mechanism to +distribute the load on a single client-visible container across multiple, +hidden, shard containers, each of which stores a subset of the container's +object records. Clients are unaware of container sharding; clients continue to +use the same API to access a container that, if sharded, maps to a number of +shard containers within the Swift cluster. + +------------------------ +Deployment and operation +------------------------ + +Upgrade Considerations +---------------------- + +It is essential that all servers in a Swift cluster have been upgraded to +support the container sharding feature before attempting to shard a container. + +Identifying containers in need of sharding +------------------------------------------ + +Container sharding is currently initiated by the ``swift-manage-shard-ranges`` +CLI tool :ref:`described below `. Operators must +first identify containers that are candidates for sharding. To assist with +this, the :ref:`sharder_daemon` inspects the size of containers that it visits +and writes a list of sharding candidates to recon cache. For example:: + + "sharding_candidates": { + "found": 1, + "top": [ + { + "account": "AUTH_test", + "container": "c1", + "file_size": 497763328, + "meta_timestamp": "1525346445.31161", + "node_index": 2, + "object_count": 3349028, + "path": , + "root": "AUTH_test/c1" + } + ] + } + +A container is considered to be a sharding candidate if its object count is +greater than or equal to the ``shard_container_threshold`` option. +The number of candidates reported is limited to a number configured by the +``recon_candidates_limit`` option such that only the largest candidate +containers are included in the ``sharding_candidate`` data. + + +.. _swift-manage-shard-ranges: + +``swift-manage-shard-ranges`` CLI tool +-------------------------------------- + +The ``swift-manage-shard-ranges`` tool provides commands for initiating +sharding of a container. ``swift-manage-shard-ranges`` operates directly on a +container database file. + +.. note:: + + ``swift-manage-shard-ranges`` must only be used on one replica of a + container database to avoid inconsistent results. The modifications made by + ``swift-manage-shard-ranges`` will be automatically copied to other + replicas of the container database via normal replication processes. + +There are three steps in the process of initiating sharding, each of which may +be performed in isolation or, as shown below, using a single command. + +#. The ``find`` sub-command scans the container database to identify how many + shard containers will be required and which objects they will manage. Each + shard container manages a range of the object namespace defined by a + ``lower`` and ``upper`` bound. The maximum number of objects to be allocated + to each shard container is specified on the command line. For example:: + + $ swift-manage-shard-ranges find 500000 + Loaded db broker for AUTH_test/c1. + [ + { + "index": 0, + "lower": "", + "object_count": 500000, + "upper": "o_01086834" + }, + { + "index": 1, + "lower": "o_01086834", + "object_count": 500000, + "upper": "o_01586834" + }, + { + "index": 2, + "lower": "o_01586834", + "object_count": 500000, + "upper": "o_02087570" + }, + { + "index": 3, + "lower": "o_02087570", + "object_count": 500000, + "upper": "o_02587572" + }, + { + "index": 4, + "lower": "o_02587572", + "object_count": 500000, + "upper": "o_03087572" + }, + { + "index": 5, + "lower": "o_03087572", + "object_count": 500000, + "upper": "o_03587572" + }, + { + "index": 6, + "lower": "o_03587572", + "object_count": 349194, + "upper": "" + } + ] + Found 7 ranges in 4.37222s (total object count 3349194) + + This command returns a list of shard ranges each of which describes the + namespace to be managed by a shard container. No other action is taken by + this command and the container database is unchanged. The output may be + redirected to a file for subsequent retrieval by the ``replace`` command. + For example:: + + $ swift-manage-shard-ranges find 500000 > my_shard_ranges + Loaded db broker for AUTH_test/c1. + Found 7 ranges in 2.448s (total object count 3349194) + +#. The ``replace`` sub-command deletes any shard ranges that might already be + in the container database and inserts shard ranges from a given file. The + file contents should be in the format generated by the ``find`` sub-command. + For example:: + + $ swift-manage-shard-ranges replace my_shard_ranges + Loaded db broker for AUTH_test/c1. + No shard ranges found to delete. + Injected 7 shard ranges. + Run container-replicator to replicate them to other nodes. + Use the enable sub-command to enable sharding. + + The container database is modified to store the shard ranges, but the + container will not start sharding until sharding is enabled. The ``info`` + sub-command may be used to inspect the state of the container database at + any point, and the ``show`` sub-command may be used to display the inserted + shard ranges. + + Shard ranges stored in the container database may be replaced using the + ``replace`` sub-command. This will first delete all existing shard ranges + before storing new shard ranges. Shard ranges may also be deleted from the + container database using the ``delete`` sub-command. + + Shard ranges should not be replaced or deleted using + ``swift-manage-shard-ranges`` once the next step of enabling sharding has + been taken. + +#. The ``enable`` sub-command enables the container for sharding. The sharder + daemon and/or container replicator daemon will replicate shard ranges to + other replicas of the container db and the sharder daemon will proceed to + shard the container. This process may take some time depending on the size + of the container, the number of shard ranges and the underlying hardware. + +.. note:: + + Once the ``enable`` sub-command has been used there is no supported + mechanism to revert sharding. Do not use ``swift-manage-shard-ranges`` to + make any further changes to the shard ranges in the container db. + + For example:: + + $ swift-manage-shard-ranges enable + Loaded db broker for AUTH_test/c1. + Container moved to state 'sharding' with epoch 1525345093.22908. + Run container-sharder on all nodes to shard the container. + + This does not shard the container - sharding is performed by the + :ref:`sharder_daemon` - but sets the necessary state in the database for the + daemon to subsequently start the sharding process. + + The ``epoch`` value displayed in the output is the time at which sharding + was enabled. When the :ref:`sharder_daemon` starts sharding this container + it creates a new container database file using the epoch in the filename to + distinguish it from the retiring DB that is being sharded. + +All three steps may be performed with one sub-command:: + + $ swift-manage-shard-ranges find_and_replace 500000 --enable --force + Loaded db broker for AUTH_test/c1. + No shard ranges found to delete. + Injected 7 shard ranges. + Run container-replicator to replicate them to other nodes. + Container moved to state 'sharding' with epoch 1525345669.46153. + Run container-sharder on all nodes to shard the container. + +.. _sharder_daemon: + +``container-sharder`` daemon +---------------------------- + +Once sharding has been enabled for a container, the act of sharding is +performed by the :ref:`container-sharder`. The :ref:`container-sharder` daemon +must be running on all container servers. The ``container-sharder`` daemon +periodically visits each container database to perform any container sharding +tasks that are required. + +The ``container-sharder`` daemon requires a ``[container-sharder]`` config +section to exist in the container server configuration file; a sample config +section is shown in the `container-server.conf-sample` file. + +.. note:: + + Several of the ``[container-sharder]`` config options are only significant + when the ``auto_shard`` option is enabled. This option enables the + ``container-sharder`` daemon to automatically identify containers that are + candidates for sharding and initiate the sharding process, instead of using + the ``swift-manage-shard-ranges`` tool. The ``auto_shard`` option is + currently NOT recommended for production systems and shoud be set to + ``false`` (the default value). + +The container sharder uses an internal client and therefore requires an +internal client configuration file to exist. By default the internal-client +configuration file is expected to be found at +`/etc/swift/internal-client.conf`. An alternative location for the +configuration file may be specified using the ``internal_client_conf_path`` +option in the ``[container-sharder]`` config section. + +The content of the internal-client configuration file should be the same as the +`internal-client.conf-sample` file. In particular, the internal-client +configuration should have:: + + account_autocreate = True + +in the ``[proxy-server]`` section. + +A container database may require several visits by the ``container-sharder`` +daemon before it is fully sharded. On each visit the ``container-sharder`` +daemon will move a subset of object records to new shard containers by cleaving +new shard container databases from the original. By default, two shards are +processed per visit; this number may be configured by the ``cleave_batch_size`` +option. + +The ``container-sharder`` daemon periodically writes progress data for +containers that are being sharded to recon cache. For example:: + + "sharding_in_progress": { + "all": [ + { + "account": "AUTH_test", + "active": 0, + "cleaved": 2, + "container": "c1", + "created": 5, + "db_state": "sharding", + "error": null, + "file_size": 26624, + "found": 0, + "meta_timestamp": "1525349617.46235", + "node_index": 1, + "object_count": 3349030, + "path": , + "root": "AUTH_test/c1", + "state": "sharding" + } + ] + } + +This example indicates that from a total of 7 shard ranges, 2 have been cleaved +whereas 5 remain in created state waiting to be cleaved. + +Shard containers are created in an internal account and not visible to clients. +By default, shard containers for an account ``AUTH_test`` are created in the +internal account ``.shards_AUTH_test``. + +Once a container has started sharding, object updates to that container may be +redirected to the shard container. The ``container-sharder`` daemon is also +responsible for sending updates of a shard's object count and bytes_used to the +original container so that aggegrate object count and bytes used values can be +returned in responses to client requests. + +.. note:: + + The ``container-sharder`` daemon must continue to run on all container + servers in order for shards object stats updates to be generated. + + +-------------- +Under the hood +-------------- + +Terminology +----------- + +================== ================================================== +Name Description +================== ================================================== +Root container The original container that lives in the + user's account. It holds references to its + shard containers. +Retiring DB The original database file that is to be sharded. +Fresh DB A database file that will replace the retiring + database. +Shard range A range of the object namespace defined by a lower + bound and and upper bound. +Shard container A container that holds object records for a shard + range. Shard containers exist a hidden account + mirroring the user's account. +Misplaced objects Items that don't belong in a container's shard + range. These will be moved to their correct + location by the container-sharder. +Cleaving The act of moving object records within a shard + range to a shard container database. +Shrinking The act of merging a small shard container into + another shard container in order to delete the + small shard container. +Donor The shard range that is shrinking away. +Acceptor The shard range into which a donor is merged. +================== ================================================== + + +Finding shard ranges +-------------------- + +The end goal of sharding a container is to replace the original container +database which has grown very large with a number of shard container databases, +each of which is responsible for storing a range of the entire object +namespace. The first step towards achieving this is to identify an appropriate +set of contiguous object namespaces, known as shard ranges, each of which +contains a similar sized portion of the container's current object content. + +Shard ranges cannot simply be selected by sharding the namespace uniformly, +because object names are not guaranteed to be distributed uniformly. If the +container were naively sharded into two shard ranges, one containing all +object names up to `m` and the other containing all object names beyond `m`, +then if all object names actually start with `o` the outcome would be an +extremely unbalanced pair of shard containers. + +It is also too simplistic to assume that every container that requires sharding +can be sharded into two. This might be the goal in the ideal world, but in +practice there will be containers that have grown very large and should be +sharded into many shards. Furthermore, the time required to find the exact +mid-point of the existing object names in a large SQLite database would +increase with container size. + +For these reasons, shard ranges of size `N` are found by searching for the +`Nth` object in the database table, sorted by object name, and then searching +for the `(2 * N)th` object, and so on until all objects have been searched. For +a container that has exactly `2N` objects, the end result is the same as +sharding the container at the midpoint of its object names. In practice +sharding would typically be enabled for containers with great than `2N` objects +and more than two shard ranges will be found, the last one probably containing +less than `N` objects. With containers having large multiples of `N` objects, +shard ranges can be identified in batches which enables more scalable solution. + +To illustrate this process, consider a very large container in a user account +``acct`` that is a candidate for sharding: + +.. image:: images/sharding_unsharded.svg + +The :ref:`swift-manage-shard-ranges` tool ``find`` sub-command searches the +object table for the `Nth` object whose name will become the upper bound of the +first shard range, and the lower bound of the second shard range. The lower +bound of the first shard range is the empty string. + +For the purposes of this example the first upper bound is `cat`: + +.. image:: images/sharding_scan_basic.svg + +:ref:`swift-manage-shard-ranges` continues to search the container to find +further shard ranges, with the final upper bound also being the empty string. + +Enabling sharding +----------------- + +Once shard ranges have been found the :ref:`swift-manage-shard-ranges` +``replace`` sub-command is used to insert them into the `shard_ranges` table +of the container database. In addition to its lower and upper bounds, each +shard range is given a name. The name takes the form ``a/c`` where ``a`` is an +account name formed by prefixing the user account with the string +``.shards_``, and ``c`` is a container name that is derived from the original +container and includes the index of the shard range. The final container name +for the shard range uses the pattern of ``{original contianer name}-{hash of +parent container}-{timestamp}-{shard index}``. + +The ``enable`` sub-command then creates some final state required to initiate +sharding the container, including a special shard range record referred to as +the container's `own_shard_range` whose name is equal to the container's path. +This is used to keep a record of the object namespace that the container +covers, which for user containers is always the entire namespace. + +The :class:`~swift.common.utils.ShardRange` class +------------------------------------------------- + +The :class:`~swift.common.utils.ShardRange` class provides methods for +interactng with the attributes and state of a shard range. The class +encapsulates the following properties: + +* The name of the shard range which is also the name of the shard container + used to hold object records in its namespace. +* Lower and upper bounds which define the object namespace of the shard range. +* A deleted flag. +* A timestamp at which the bounds and deleted flag were last modified. +* The object stats for the shard range i.e. object count and bytes used. +* A timestamp at which the object stats were last modified. +* The state of the shard range, and an epoch, which is the timestamp used in + the shard container's database file name. +* A timestamp at which the state and epoch were last modified. + +A shard range progresses through the following states: + +* FOUND: the shard range has been identified in the container that is to be + sharded but no resources have been created for it. +* CREATED: A shard container has been created to store the contents of the + shard range. +* CLEAVED: the sharding container's contents for the shard range have been + copied to the shard container from *at least one replica* of the sharding + container. +* ACTIVE: shard ranges move to this state when all shard ranges in a sharding + container have been cleaved. +* SHRINKING: the shard range has been enabled for shrinking; or +* SHARDING: the shard range has been enabled for sharding. +* SHARDED: the shard range has completed sharding or shrinking. + +..note:: + + Shard range state represents the most advanced state of the shard range on + any replica of the container. For example, a shard range in CLEAVED state + may not have completed cleaving on all replicas but has cleaved on at least + one replica. + +Fresh and retiring database files +--------------------------------- + +As alluded to earlier, writing to a large container causes increased latency +for the container servers. Once sharding has been initiated on a container it +is desirable to stop writing to the large database; ultimately it will be +unlinked. This is primarily achieved by redirecting object updates to new shard +containers as they are created (see :ref:`redirecting_updates` below), but some +object updates may still need to be accepted by the root container and other +container metadata must still be modifiable. + +To render the large `retiring` database effectively read-only, when the +:ref:`sharder_daemon` finds a container with a set of shard range records, +including an `own_shard_range`, it first creates a fresh database file which +will ultimately replace the existing `retiring` database. For a retiring db +whose filename is:: + + .db + +the fresh database file name is of the form:: + + _.db + +where epoch is a timestamp stored in the container's `own_shard_range`. + +The fresh DB has a copy of the shard ranges table from the retiring DB and all +other container metadata apart from the object records. Once a fresh DB file +has been created it is used to store any new object updates and no more object +records are written to the retiring DB file. + +Once the sharding process has completed, the retiring DB file will be unlinked +leaving only the fresh DB file in the container's directory. There are +therefore three states that the container DB directory may be in during the +sharding process: UNSHARDED, SHARDING and SHARDED. + +.. image:: images/sharding_db_states.svg + +If the container ever shrink to the point that is has no shards then the fresh +DB starts to store object records, behaving the same as an unsharded container. +This is known as the COLLAPSED state. + +In summary, the DB states that any container replica may be in are: + +- UNSHARDED - In this state there is just one standard container database. All + containers are originally in this state. +- SHARDING - There are now two databases, the retiring database and a fresh + database. The fresh database stores any metadata, container level stats, + an object holding table, and a table that stores shard ranges. +- SHARDED - There is only one database, the fresh database, which has one or + more shard ranges in addition to its own shard range. The retiring database + has been unlinked. +- COLLAPSED - There is only one database, the fresh database, which has only + its its own shard range and store object records. + +.. note:: + + DB state is unique to each replica of a container and is not necessarily + synchronised with shard range state. + + +Creating shard containers +------------------------- + +The :ref:`sharder_daemon` next creates a shard container for each shard range +using the shard range name as the name of the shard container: + +.. image:: /images/sharding_cleave_basic.svg + +Shard containers now exist with a unique name and placed in a hidden account +that maps to the user account (`.shards_acct`). This avoids namespace +collisions and also keeps all the shard containers out of view from users of +the account. Each shard container has an `own_shard_range` record which has the +lower and upper bounds of the object namespace for which it is responsible, and +a reference to the sharding user container, which is referred to as the +`root_container`. Unlike the `root_container`, the shard container's +`own_shard_range` does not cover the entire namepsace. + +Cleaving shard containers +------------------------- + +Having created empty shard containers the sharder daemon will proceed to cleave +objects from the retiring database to each shard range. Cleaving occurs in +batches of two (by default) shard ranges, so if a container has more than two +shard ranges then the daemon must visit it multiple times to complete cleaving. + +To cleave a shard range the daemon creates a shard database for the shard +container on a local device. This device may be one of the shard container's +primary nodes but often it will not. Object records from the corresponding +shard range namespace are then copied from the retiring DB to this shard DB. + +Swift's container replication mechanism is then used to replicate the shard DB +to its primary nodes. Checks are made to ensure that the new shard container DB +has been replicated to a sufficient number of its primary nodes before it is +considered to have been successfully cleaved. By default the daemon requires +successful replication of a new shard broker to at least a quorum of the +container rings replica count, but this requirement can be tuned using the +``shard_replication_quorum`` option. + +Once a shard range has been succesfully cleaved from a retiring database the +daemon transitions its state to ``CLEAVED``. It should be noted that this state +transition occurs as soon as any one of the retiring DB replicas has cleaved +the shard range, and therefore does not imply that all retiring DB replicas +have cleaved that range. The significance of the state transition is that the +shard container is now considered suitable for contributing to object listings, +since its contents are present on a quorum of its primary nodes and are the +same as at least one of the retiring DBs for that namespace. + +Once a shard range is in the ``CLEAVED`` state, the requirement for +'successful' cleaving of other instances of the retirng DB may optionally be +relaxed since it is not so imperative that their contents are replicated +*immediately* to their primary nodes. The ``existing_shard_replication_quorum`` +option can be used to reduce the quorum required for a cleaved shard range to +be considered successfully replicated by the sharder daemon. + +.. note:: + + Once cleaved, shard container DBs will continue to be replicated by the + normal `container-replicator` daemon so that they will eventually be fully + replicated to all primary nodes regardless of any replication quorum options + used by the sharder daemon. + +The cleaving progress of each replica of a retiring DB must be +tracked independently of the shard range state. This is done using a per-DB +CleavingContext object that maintains a cleaving cursor for the retiring DB +that it is associated with. The cleaving cursor is simply the upper bound of +the last shard range to have been cleaved *from that particular retiring DB*. + +Each CleavingContext is stored in the sharding container's sysmeta under a key +that is the ``id`` of the retiring DB. Since all container DB files have unique +``id``s, this guarantees that each retiring DB will have a unique +CleavingContext. Furthermore, if the retiring DB file is changed, for example +by an rsync_then_merge replication operation which might change the contents of +the DB's object table, then it will get a new unique CleavingContext. + +A CleavingContext maintains other state that is used to ensure that a retiring +DB is only considered to be fully cleaved, and ready to be deleted, if *all* of +its object rows have been cleaved to a shard range. + +Once all shard ranges have been cleaved from the retiring DB it is deleted. The +container is now represented by the fresh DB which has a table of shard range +records that point to the shard containers that store the container's object +records. + +.. _redirecting_updates: + +Redirecting object updates +-------------------------- + +Once a shard container exists, object updates arising from new client requests +and async pending files are directed to the shard container instead of the root +container. This takes load off of the root container. + +For a sharded (or partially sharded) container, when the proxy receives a new +object request it issues a GET request to the container for data describing a +shard container to which the object update should be sent. The proxy then +annotates the object request with the shard container location so that the +object server will forward object updates to the shard container. If those +updates fail then the async pending file that is written on the object server +contains the shard container location. + +When the object updater processes async pending files for previously failed +object updates, it may not find a shard container location. In this case the +updater sends the update to the `root container`, which returns a redirection +response with the shard container location. + +.. note:: + + Object updates are directed to shard containers as soon as they exist, even + if the retiring DB object records have not yet been cleaved to the shard + container. This prevents further writes to the retiring DB and also avoids + the fresh DB being polluted by new object updates. The goal is to + ultimately have all object records in the shard containers and none in the + root container. + +Building container listings +--------------------------- + +Listing requests for a sharded container are handled by querying the shard +containers for components of the listing. The proxy forwards the client listing +request to the root container, as it would for an unsharded container, but the +container server responds with a list of shard ranges rather than objects. The +proxy then queries each shard container in namespace order for their listing, +until either the listing length limit is reached or all shard ranges have been +listed. + +While a container is still in the process of sharding, only *cleaved* shard +ranges are used when building a container listing. Shard ranges that have not +yet cleaved will not have any object records from the root container. The root +container continues to provide listings for the uncleaved part of its +namespace. + +..note:: + + New object updates are redirected to shard containers that have not yet been + cleaved. These updates will not threfore be included in container listings + until their shard range has been cleaved. + +Example request redirection +--------------------------- + +As an example, consider a sharding container in which 3 shard ranges have been +found ending in cat, giraffe and igloo. Their respective shard containers have +been created so update requests for objects up to "igloo" are redirected to the +appropriate shard container. The root DB continues to handle listing requests +and update requests for any object name beyond "igloo". + +.. image:: images/sharding_scan_load.svg + +The sharder daemon cleaves objects from the retiring DB to the shard range DBs; +it also moves any misplaced objects from the root container's fresh DB to the +shard DB. Cleaving progress is represented by the blue line. Once the first +shard range has been cleaved listing requests for that namespace are directed +to the shard container. The root container still provides listings for the +remainder of the namespace. + +.. image:: images/sharding_cleave1_load.svg + +The process continues: the sharder cleaves the next range and a new range is +found with upper bound of "linux". Now the root container only needs to handle +listing requests up to "giraffe" and update requests for objects whose name is +greater than "linux". Load will continue to diminish on the root DB and be +dispersed across the shard DBs. + +.. image:: images/sharding_cleave2_load.svg + + +Container replication +--------------------- + +Shard range records are replicated between container DB replicas in much the +same way as object records are for unsharded containers. However, the usual +replication of object records between replicas of a container is halted as soon +as a container is capable of being sharded. Instead, object records are moved +to their new locations in shard containers. This avoids unnecessary replication +traffic between container replicas. + +To facilitate this, shard ranges are both 'pushed' and 'pulled' during +replication, prior to any attempt to replicate objects. This means that the +node initiating replication learns about shard ranges from the destination node +early during the replication process and is able to skip object replication if +it discovers that it has shard ranges and is able to shard. + +.. note:: + + When the destination DB for container replication is missing then the + 'complete_rsync' replication mechanism is still used and in this case only + both object records and shard range records are copied to the destination + node. + +Container deletion +------------------ + +Sharded containers may be deleted by a ``DELETE`` request just like an +unsharded container. A sharded container must be empty before it can be deleted +which implies that all of its shard containers must have reported that they are +empty. + +Shard containers are *not* immediately deleted when their root container is +deleted; the shard containers remain undeleted so that they are able to +continue to receive object updates that might arrive after the root container +has been deleted. Shard containers continue to update their deleted root +container with their object stats. If a shard container does receive object +updates that cause it to no longer be empty then the root container will no +longer be considered deleted once that shard container sends an object stats +update. + + +Sharding a shard container +-------------------------- + +A shard container may grow to a size that requires it to be sharded. +``swift-manage-shard-ranges`` may be used to identify shard ranges within a +shard container and enable sharding in the same way as for a root container. +When a shard is sharding it notifies the root of its shard ranges so that the +root can start to redirect object updates to the new 'sub-shards'. When the +shard has completed sharding the root is aware of all the new sub-shards and +the sharding shard deletes its shard range record in the root container shard +ranges table. At this point the root is aware of all the new sub-shards which +collectively cover the namespace of the now-deleted shard. + +There is no hierarchy of shards beyond the root and its immediate shards. When +a shard shards, its sub-shards are effectively re-parented with the root +container. + + +Shrinking a shard container +--------------------------- + +A shard's contents may reduce to a point where the shard is no longer required. +If this happens then the shard may be shrunk into another shard range. +Shrinking is achieved in a similar way to sharding: an 'acceptor' shard range +is written to the shrinking shard container's shard ranges table; unlike +sharding, where shard ranges each cover a subset of the sharding container's +namespace, the acceptor shard range is a superset of the shrinking shard range. + +Once given an acceptor shard range the shrinking shard will cleave itself to +its acceptor, and then delete itself from the root container shard ranges +table. diff --git a/etc/container-server.conf-sample b/etc/container-server.conf-sample index 4059e39418..7d38deb0c5 100644 --- a/etc/container-server.conf-sample +++ b/etc/container-server.conf-sample @@ -69,6 +69,10 @@ bind_port = 6201 # Work only with ionice_class. # ionice_class = # ionice_priority = +# +# The prefix used for hidden auto-created accounts, for example accounts in +# which shard containers are created. Defaults to '.'. +# auto_create_account_prefix = . [pipeline:main] pipeline = healthcheck recon container-server @@ -323,3 +327,117 @@ use = egg:swift#xprofile # # unwind the iterator of applications # unwind = false + +[container-sharder] +# You can override the default log routing for this app here (don't use set!): +# log_name = container-sharder +# log_facility = LOG_LOCAL0 +# log_level = INFO +# log_address = /dev/log +# +# Container sharder specific settings +# +# If the auto_shard option is true then the sharder will automatically select +# containers to shard, scan for shard ranges, and select shards to shrink. +# The default is false. +# Warning: auto-sharding is still under development and should not be used in +# production; do not set this option to true in a production cluster. +# auto_shard = false +# +# When auto-sharding is enabled shard_container_threshold defines the object +# count at which a container with container-sharding enabled will start to +# shard. shard_container_threshold also indirectly determines the initial +# nominal size of shard containers, which is shard_container_threshold // 2, as +# well as determining the thresholds for shrinking and merging shard +# containers. +# shard_container_threshold = 1000000 +# +# When auto-sharding is enabled shard_shrink_point defines the object count +# below which a 'donor' shard container will be considered for shrinking into +# another 'acceptor' shard container. shard_shrink_point is a percentage of +# shard_container_threshold e.g. the default value of 5 means 5% of the +# shard_container_threshold. +# shard_shrink_point = 5 +# +# When auto-sharding is enabled shard_shrink_merge_point defines the maximum +# allowed size of an acceptor shard container after having a donor merged into +# it. Shard_shrink_merge_point is a percentage of shard_container_threshold. +# e.g. the default value of 75 means that the projected sum of a donor object +# count and acceptor count must be less than 75% of shard_container_threshold +# for the donor to be allowed to merge into the acceptor. +# +# For example, if the shard_container_threshold is 1 million, +# shard_shrink_point is 5, and shard_shrink_merge_point is 75 then a shard will +# be considered for shrinking if it has less than or equal to 50 thousand +# objects but will only merge into an acceptor if the combined object count +# would be less than or equal to 750 thousand objects. +# shard_shrink_merge_point = 75 +# +# When auto-sharding is enabled shard_scanner_batch_size defines the maximum +# number of shard ranges that will be found each time the sharder daemon visits +# a sharding container. If necessary the sharder daemon will continue to search +# for more shard ranges each time it visits the container. +# shard_scanner_batch_size = 10 +# +# cleave_batch_size defines the number of shard ranges that will be cleaved +# each time the sharder daemon visits a sharding container. +# cleave_batch_size = 2 +# +# cleave_row_batch_size defines the size of batches of object rows read from a +# sharding container and merged to a shard container during cleaving. +# cleave_row_batch_size = 10000 +# +# Defines the number of successfully replicated shard dbs required when +# cleaving a previously uncleaved shard range before the sharder will progress +# to the next shard range. The value should be less than or equal to the +# container ring replica count. The default of 'auto' causes the container ring +# quorum value to be used. This option only applies to the container-sharder +# replication and does not affect the number of shard container replicas that +# will eventually be replicated by the container-replicator. +# shard_replication_quorum = auto +# +# Defines the number of successfully replicated shard dbs required when +# cleaving a shard range that has been previously cleaved on another node +# before the sharder will progress to the next shard range. The value should be +# less than or equal to the container ring replica count. The default of 'auto' +# causes the shard_replication_quorum value to be used. This option only +# applies to the container-sharder replication and does not affect the number +# of shard container replicas that will eventually be replicated by the +# container-replicator. +# existing_shard_replication_quorum = auto +# +# The sharder uses an internal client to create and make requests to +# containers. The absolute path to the client config file can be configured. +# internal_client_conf_path = /etc/swift/internal-client.conf +# +# The number of time the internal client will retry requests. +# request_tries = 3 +# +# Each time the sharder dumps stats to the recon cache file it includes a list +# of containers that appear to need sharding but are not yet sharding. By +# default this list is limited to the top 5 containers, ordered by object +# count. The limit may be changed by setting recon_candidates_limit to an +# integer value. A negative value implies no limit. +# recon_candidates_limit = 5 +# +# Large databases tend to take a while to work with, but we want to make sure +# we write down our progress. Use a larger-than-normal broker timeout to make +# us less likely to bomb out on a LockTimeout. +# broker_timeout = 60 +# +# Time in seconds to wait between sharder cycles +# interval = 30 +# +# The container-sharder accepts the following configuration options as defined +# in the container-replicator section: +# +# per_diff = 1000 +# max_diffs = 100 +# concurrency = 8 +# node_timeout = 10 +# conn_timeout = 0.5 +# reclaim_age = 604800 +# rsync_compress = no +# rsync_module = {replication_ip}::container +# recon_cache_path = /var/cache/swift +# diff --git a/setup.cfg b/setup.cfg index 7ed7f1ec17..bc6b1a07c0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,6 +36,7 @@ scripts = bin/swift-container-info bin/swift-container-replicator bin/swift-container-server + bin/swift-container-sharder bin/swift-container-sync bin/swift-container-updater bin/swift-container-reconciler @@ -71,6 +72,9 @@ keystone = keystonemiddleware>=4.17.0 [entry_points] +console_scripts = + swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main + paste.app_factory = proxy = swift.proxy.server:app_factory object = swift.obj.server:app_factory diff --git a/swift/account/backend.py b/swift/account/backend.py index 2734548cf0..1ff940d4f6 100644 --- a/swift/account/backend.py +++ b/swift/account/backend.py @@ -22,7 +22,7 @@ import six.moves.cPickle as pickle import sqlite3 from swift.common.utils import Timestamp -from swift.common.db import DatabaseBroker, utf8encode +from swift.common.db import DatabaseBroker, utf8encode, zero_like DATADIR = 'accounts' @@ -233,7 +233,7 @@ class AccountBroker(DatabaseBroker): with self.get() as conn: row = conn.execute( 'SELECT container_count from account_stat').fetchone() - return (row[0] == 0) + return zero_like(row[0]) def make_tuple_for_pickle(self, record): return (record['name'], record['put_timestamp'], @@ -254,7 +254,7 @@ class AccountBroker(DatabaseBroker): :param storage_policy_index: the storage policy for this container """ if Timestamp(delete_timestamp) > Timestamp(put_timestamp) and \ - object_count in (None, '', 0, '0'): + zero_like(object_count): deleted = 1 else: deleted = 0 @@ -273,8 +273,7 @@ class AccountBroker(DatabaseBroker): :returns: True if the DB is considered to be deleted, False otherwise """ - return status == 'DELETED' or ( - container_count in (None, '', 0, '0') and + return status == 'DELETED' or zero_like(container_count) and ( Timestamp(delete_timestamp) > Timestamp(put_timestamp)) def _is_deleted(self, conn): @@ -509,7 +508,7 @@ class AccountBroker(DatabaseBroker): record[2] = row[2] # If deleted, mark as such if Timestamp(record[2]) > Timestamp(record[1]) and \ - record[3] in (None, '', 0, '0'): + zero_like(record[3]): record[5] = 1 else: record[5] = 0 diff --git a/swift/cli/info.py b/swift/cli/info.py index 0eee781ba6..1969435285 100644 --- a/swift/cli/info.py +++ b/swift/cli/info.py @@ -298,6 +298,27 @@ def print_db_info_metadata(db_type, info, metadata, drop_prefixes=False): else: print('No user metadata found in db file') + if db_type == 'container': + print('Sharding Metadata:') + shard_type = 'root' if info['is_root'] else 'shard' + print(' Type: %s' % shard_type) + print(' State: %s' % info['db_state']) + if info.get('shard_ranges'): + print('Shard Ranges (%d):' % len(info['shard_ranges'])) + for srange in info['shard_ranges']: + srange = dict(srange, state_text=srange.state_text) + print(' Name: %(name)s' % srange) + print(' lower: %(lower)r, upper: %(upper)r' % srange) + print(' Object Count: %(object_count)d, Bytes Used: ' + '%(bytes_used)d, State: %(state_text)s (%(state)d)' + % srange) + print(' Created at: %s (%s)' + % (Timestamp(srange['timestamp']).isoformat, + srange['timestamp'])) + print(' Meta Timestamp: %s (%s)' + % (Timestamp(srange['meta_timestamp']).isoformat, + srange['meta_timestamp'])) + def print_obj_metadata(metadata, drop_prefixes=False): """ @@ -406,7 +427,13 @@ def print_info(db_type, db_file, swift_dir='/etc/swift', stale_reads_ok=False, raise InfoSystemExit() raise account = info['account'] - container = info['container'] if db_type == 'container' else None + container = None + if db_type == 'container': + container = info['container'] + info['is_root'] = broker.is_root_container() + sranges = broker.get_shard_ranges() + if sranges: + info['shard_ranges'] = sranges print_db_info_metadata(db_type, info, broker.metadata, drop_prefixes) try: ring = Ring(swift_dir, ring_name=db_type) diff --git a/swift/cli/manage_shard_ranges.py b/swift/cli/manage_shard_ranges.py new file mode 100644 index 0000000000..acbc364968 --- /dev/null +++ b/swift/cli/manage_shard_ranges.py @@ -0,0 +1,370 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy +# of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from __future__ import print_function +import argparse +import json +import sys +import time + +from six.moves import input + +from swift.common.utils import Timestamp, get_logger, ShardRange +from swift.container.backend import ContainerBroker, UNSHARDED +from swift.container.sharder import make_shard_ranges, sharding_enabled, \ + CleavingContext + + +def _load_and_validate_shard_data(args): + try: + with open(args.input, 'rb') as fd: + try: + data = json.load(fd) + if not isinstance(data, list): + raise ValueError('Shard data must be a list of dicts') + for k in ('lower', 'upper', 'index', 'object_count'): + for shard in data: + shard[k] + return data + except (TypeError, ValueError, KeyError) as err: + print('Failed to load valid shard range data: %r' % err, + file=sys.stderr) + exit(2) + except IOError as err: + print('Failed to open file %s: %s' % (args.input, err), + file=sys.stderr) + exit(2) + + +def _check_shard_ranges(own_shard_range, shard_ranges): + reasons = [] + + def reason(x, y): + if x != y: + reasons.append('%s != %s' % (x, y)) + + if not shard_ranges: + reasons.append('No shard ranges.') + else: + reason(own_shard_range.lower, shard_ranges[0].lower) + reason(own_shard_range.upper, shard_ranges[-1].upper) + for x, y in zip(shard_ranges, shard_ranges[1:]): + reason(x.upper, y.lower) + + if reasons: + print('WARNING: invalid shard ranges: %s.' % reasons) + print('Aborting.') + exit(2) + + +def _check_own_shard_range(broker, args): + # TODO: this check is weak - if the shards prefix changes then we may not + # identify a shard container. The goal is to not inadvertently create an + # entire namespace default shard range for a shard container. + is_shard = broker.account.startswith(args.shards_account_prefix) + own_shard_range = broker.get_own_shard_range(no_default=is_shard) + if not own_shard_range: + print('WARNING: shard container missing own shard range.') + print('Aborting.') + exit(2) + return own_shard_range + + +def _find_ranges(broker, args, status_file=None): + start = last_report = time.time() + limit = 5 if status_file else -1 + shard_data, last_found = broker.find_shard_ranges( + args.rows_per_shard, limit=limit) + if shard_data: + while not last_found: + if last_report + 10 < time.time(): + print('Found %d ranges in %gs; looking for more...' % ( + len(shard_data), time.time() - start), file=status_file) + last_report = time.time() + # prefix doesn't matter since we aren't persisting it + found_ranges = make_shard_ranges(broker, shard_data, '.shards_') + more_shard_data, last_found = broker.find_shard_ranges( + args.rows_per_shard, existing_ranges=found_ranges, limit=5) + shard_data.extend(more_shard_data) + return shard_data, time.time() - start + + +def find_ranges(broker, args): + shard_data, delta_t = _find_ranges(broker, args, sys.stderr) + print(json.dumps(shard_data, sort_keys=True, indent=2)) + print('Found %d ranges in %gs (total object count %s)' % + (len(shard_data), delta_t, + sum(r['object_count'] for r in shard_data)), + file=sys.stderr) + return 0 + + +def show_shard_ranges(broker, args): + shard_ranges = broker.get_shard_ranges( + include_deleted=getattr(args, 'include_deleted', False)) + shard_data = [dict(sr, state=sr.state_text) + for sr in shard_ranges] + + if not shard_data: + print("No shard data found.", file=sys.stderr) + elif getattr(args, 'brief', False): + print("Existing shard ranges:", file=sys.stderr) + print(json.dumps([(sd['lower'], sd['upper']) for sd in shard_data], + sort_keys=True, indent=2)) + else: + print("Existing shard ranges:", file=sys.stderr) + print(json.dumps(shard_data, sort_keys=True, indent=2)) + return 0 + + +def db_info(broker, args): + print('Sharding enabled = %s' % sharding_enabled(broker)) + own_sr = broker.get_own_shard_range(no_default=True) + print('Own shard range: %s' % + (json.dumps(dict(own_sr, state=own_sr.state_text), + sort_keys=True, indent=2) + if own_sr else None)) + db_state = broker.get_db_state() + print('db_state = %s' % db_state) + if db_state == 'sharding': + print('Retiring db id: %s' % broker.get_brokers()[0].get_info()['id']) + print('Cleaving context: %s' % + json.dumps(dict(CleavingContext.load(broker)), + sort_keys=True, indent=2)) + print('Metadata:') + for k, (v, t) in broker.metadata.items(): + print(' %s = %s' % (k, v)) + + +def delete_shard_ranges(broker, args): + shard_ranges = broker.get_shard_ranges() + if not shard_ranges: + print("No shard ranges found to delete.") + return 0 + + while not args.force: + print('This will delete existing %d shard ranges.' % len(shard_ranges)) + if broker.get_db_state() != UNSHARDED: + print('WARNING: Be very cautious about deleting existing shard ' + 'ranges. Deleting all ranges in this db does not guarantee ' + 'deletion of all ranges on all replicas of the db.') + print(' - this db is in state %s' % broker.get_db_state()) + print(' - %d existing shard ranges have started sharding' % + [sr.state != ShardRange.FOUND + for sr in shard_ranges].count(True)) + choice = input('Do you want to show the existing ranges [s], ' + 'delete the existing ranges [yes] ' + 'or quit without deleting [q]? ') + if choice == 's': + show_shard_ranges(broker, args) + continue + elif choice == 'q': + return 1 + elif choice == 'yes': + break + else: + print('Please make a valid choice.') + print() + + now = Timestamp.now() + for sr in shard_ranges: + sr.deleted = 1 + sr.timestamp = now + broker.merge_shard_ranges(shard_ranges) + print('Deleted %s existing shard ranges.' % len(shard_ranges)) + return 0 + + +def _replace_shard_ranges(broker, args, shard_data, timeout=None): + own_shard_range = _check_own_shard_range(broker, args) + shard_ranges = make_shard_ranges( + broker, shard_data, args.shards_account_prefix) + _check_shard_ranges(own_shard_range, shard_ranges) + + if args.verbose > 0: + print('New shard ranges to be injected:') + print(json.dumps([dict(sr) for sr in shard_ranges], + sort_keys=True, indent=2)) + + # Crank up the timeout in an effort to *make sure* this succeeds + with broker.updated_timeout(max(timeout, args.replace_timeout)): + delete_shard_ranges(broker, args) + broker.merge_shard_ranges(shard_ranges) + + print('Injected %d shard ranges.' % len(shard_ranges)) + print('Run container-replicator to replicate them to other nodes.') + if args.enable: + return enable_sharding(broker, args) + else: + print('Use the enable sub-command to enable sharding.') + return 0 + + +def replace_shard_ranges(broker, args): + shard_data = _load_and_validate_shard_data(args) + return _replace_shard_ranges(broker, args, shard_data) + + +def find_replace_shard_ranges(broker, args): + shard_data, delta_t = _find_ranges(broker, args, sys.stdout) + # Since we're trying to one-shot this, and the previous step probably + # took a while, make the timeout for writing *at least* that long + return _replace_shard_ranges(broker, args, shard_data, timeout=delta_t) + + +def _enable_sharding(broker, own_shard_range, args): + if own_shard_range.update_state(ShardRange.SHARDING): + own_shard_range.epoch = Timestamp.now() + own_shard_range.state_timestamp = own_shard_range.epoch + + with broker.updated_timeout(args.enable_timeout): + broker.merge_shard_ranges([own_shard_range]) + broker.update_metadata({'X-Container-Sysmeta-Sharding': + ('True', Timestamp.now().normal)}) + return own_shard_range + + +def enable_sharding(broker, args): + own_shard_range = _check_own_shard_range(broker, args) + _check_shard_ranges(own_shard_range, broker.get_shard_ranges()) + + if own_shard_range.state == ShardRange.ACTIVE: + own_shard_range = _enable_sharding(broker, own_shard_range, args) + print('Container moved to state %r with epoch %s.' % + (own_shard_range.state_text, own_shard_range.epoch.internal)) + elif own_shard_range.state == ShardRange.SHARDING: + if own_shard_range.epoch: + print('Container already in state %r with epoch %s.' % + (own_shard_range.state_text, own_shard_range.epoch.internal)) + print('No action required.') + else: + print('Container already in state %r but missing epoch.' % + own_shard_range.state_text) + own_shard_range = _enable_sharding(broker, own_shard_range, args) + print('Container in state %r given epoch %s.' % + (own_shard_range.state_text, own_shard_range.epoch.internal)) + else: + print('WARNING: container in state %s (should be active or sharding).' + % own_shard_range.state_text) + print('Aborting.') + return 2 + + print('Run container-sharder on all nodes to shard the container.') + return 0 + + +def _add_find_args(parser): + parser.add_argument('rows_per_shard', nargs='?', type=int, default=500000) + + +def _add_replace_args(parser): + parser.add_argument( + '--shards_account_prefix', metavar='shards_account_prefix', type=str, + required=False, help='Prefix for shards account', default='.shards_') + parser.add_argument( + '--replace-timeout', type=int, default=600, + help='Minimum DB timeout to use when replacing shard ranges.') + parser.add_argument( + '--force', '-f', action='store_true', default=False, + help='Delete existing shard ranges; no questions asked.') + parser.add_argument( + '--enable', action='store_true', default=False, + help='Enable sharding after adding shard ranges.') + + +def _add_enable_args(parser): + parser.add_argument( + '--enable-timeout', type=int, default=300, + help='DB timeout to use when enabling sharding.') + + +def _make_parser(): + parser = argparse.ArgumentParser(description='Manage shard ranges') + parser.add_argument('container_db') + parser.add_argument('--verbose', '-v', action='count', + help='Increase output verbosity') + subparsers = parser.add_subparsers( + help='Sub-command help', title='Sub-commands') + + # find + find_parser = subparsers.add_parser( + 'find', help='Find and display shard ranges') + _add_find_args(find_parser) + find_parser.set_defaults(func=find_ranges) + + # delete + delete_parser = subparsers.add_parser( + 'delete', help='Delete all existing shard ranges from db') + delete_parser.add_argument( + '--force', '-f', action='store_true', default=False, + help='Delete existing shard ranges; no questions asked.') + delete_parser.set_defaults(func=delete_shard_ranges) + + # show + show_parser = subparsers.add_parser( + 'show', help='Print shard range data') + show_parser.add_argument( + '--include_deleted', '-d', action='store_true', default=False, + help='Include deleted shard ranges in output.') + show_parser.add_argument( + '--brief', '-b', action='store_true', default=False, + help='Show only shard range bounds in output.') + show_parser.set_defaults(func=show_shard_ranges) + + # info + info_parser = subparsers.add_parser( + 'info', help='Print container db info') + info_parser.set_defaults(func=db_info) + + # replace + replace_parser = subparsers.add_parser( + 'replace', + help='Replace existing shard ranges. User will be prompted before ' + 'deleting any existing shard ranges.') + replace_parser.add_argument('input', metavar='input_file', + type=str, help='Name of file') + _add_replace_args(replace_parser) + replace_parser.set_defaults(func=replace_shard_ranges) + + # find_and_replace + find_replace_parser = subparsers.add_parser( + 'find_and_replace', + help='Find new shard ranges and replace existing shard ranges. ' + 'User will be prompted before deleting any existing shard ranges.' + ) + _add_find_args(find_replace_parser) + _add_replace_args(find_replace_parser) + _add_enable_args(find_replace_parser) + find_replace_parser.set_defaults(func=find_replace_shard_ranges) + + # enable + enable_parser = subparsers.add_parser( + 'enable', help='Enable sharding and move db to sharding state.') + _add_enable_args(enable_parser) + enable_parser.set_defaults(func=enable_sharding) + _add_replace_args(enable_parser) + return parser + + +def main(args=None): + parser = _make_parser() + args = parser.parse_args(args) + logger = get_logger({}, name='ContainerBroker', log_to_console=True) + broker = ContainerBroker(args.container_db, logger=logger, + skip_commits=True) + broker.get_info() + print('Loaded db broker for %s.' % broker.path, file=sys.stderr) + return args.func(broker, args) + + +if __name__ == '__main__': + exit(main()) diff --git a/swift/cli/shard-info.py b/swift/cli/shard-info.py new file mode 100644 index 0000000000..01223787f7 --- /dev/null +++ b/swift/cli/shard-info.py @@ -0,0 +1,195 @@ +# Copyright (c) 2017 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from collections import defaultdict + +from swift.common import utils +from swift.common.db_replicator import roundrobin_datadirs +from swift.common.ring import ring +from swift.common.utils import Timestamp +from swift.container.backend import ContainerBroker, DATADIR + +TAB = ' ' + + +def broker_key(broker): + broker.get_info() + return broker.path + + +def container_type(broker): + return 'ROOT' if broker.is_root_container() else 'SHARD' + + +def collect_brokers(conf_path, names2nodes): + conf = utils.readconf(conf_path, 'container-replicator') + root = conf.get('devices', '/srv/node') + swift_dir = conf.get('swift_dir', '/etc/swift') + c_ring = ring.Ring(swift_dir, ring_name='container') + dirs = [] + brokers = defaultdict(dict) + for node in c_ring.devs: + if node is None: + continue + datadir = os.path.join(root, node['device'], DATADIR) + if os.path.isdir(datadir): + dirs.append((datadir, node['id'], lambda *args: True)) + for part, object_file, node_id in roundrobin_datadirs(dirs): + broker = ContainerBroker(object_file) + for node in c_ring.get_part_nodes(int(part)): + if node['id'] == node_id: + node_index = str(node['index']) + break + else: + node_index = 'handoff' + names2nodes[broker_key(broker)][(node_id, node_index)] = broker + return brokers + + +def print_broker_info(node, broker, indent_level=0): + indent = indent_level * TAB + info = broker.get_info() + raw_info = broker._get_info() + deleted_at = float(info['delete_timestamp']) + if deleted_at: + deleted_at = Timestamp(info['delete_timestamp']).isoformat + else: + deleted_at = ' - ' + print('%s(%s) %s, objs: %s, bytes: %s, actual_objs: %s, put: %s, ' + 'deleted: %s' % + (indent, node[1][0], broker.get_db_state(), + info['object_count'], info['bytes_used'], raw_info['object_count'], + Timestamp(info['put_timestamp']).isoformat, deleted_at)) + + +def print_db(node, broker, expect_type='ROOT', indent_level=0): + indent = indent_level * TAB + print('%s(%s) %s node id: %s, node index: %s' % + (indent, node[1][0], broker.db_file, node[0], node[1])) + actual_type = container_type(broker) + if actual_type != expect_type: + print('%s ERROR expected %s but found %s' % + (indent, expect_type, actual_type)) + + +def print_own_shard_range(node, sr, indent_level): + indent = indent_level * TAB + range = '%r - %r' % (sr.lower, sr.upper) + print('%s(%s) %23s, objs: %3s, bytes: %3s, timestamp: %s (%s), ' + 'modified: %s (%s), %7s: %s (%s), deleted: %s epoch: %s' % + (indent, node[1][0], range, sr.object_count, sr.bytes_used, + sr.timestamp.isoformat, sr.timestamp.internal, + sr.meta_timestamp.isoformat, sr.meta_timestamp.internal, + sr.state_text, sr.state_timestamp.isoformat, + sr.state_timestamp.internal, sr.deleted, + sr.epoch.internal if sr.epoch else None)) + + +def print_own_shard_range_info(node, shard_ranges, indent_level=0): + shard_ranges.sort(key=lambda x: x.deleted) + for sr in shard_ranges: + print_own_shard_range(node, sr, indent_level) + + +def print_shard_range(node, sr, indent_level): + indent = indent_level * TAB + range = '%r - %r' % (sr.lower, sr.upper) + print('%s(%s) %23s, objs: %3s, bytes: %3s, timestamp: %s (%s), ' + 'modified: %s (%s), %7s: %s (%s), deleted: %s %s' % + (indent, node[1][0], range, sr.object_count, sr.bytes_used, + sr.timestamp.isoformat, sr.timestamp.internal, + sr.meta_timestamp.isoformat, sr.meta_timestamp.internal, + sr.state_text, sr.state_timestamp.isoformat, + sr.state_timestamp.internal, sr.deleted, sr.name)) + + +def print_shard_range_info(node, shard_ranges, indent_level=0): + shard_ranges.sort(key=lambda x: x.deleted) + for sr in shard_ranges: + print_shard_range(node, sr, indent_level) + + +def print_sharding_info(node, broker, indent_level=0): + indent = indent_level * TAB + print('%s(%s) %s' % (indent, node[1][0], broker.get_sharding_sysmeta())) + + +def print_container(name, name2nodes2brokers, expect_type='ROOT', + indent_level=0, used_names=None): + used_names = used_names or set() + indent = indent_level * TAB + node2broker = name2nodes2brokers[name] + ordered_by_index = sorted(node2broker.keys(), key=lambda x: x[1]) + brokers = [(node, node2broker[node]) for node in ordered_by_index] + + print('%sName: %s' % (indent, name)) + if name in used_names: + print('%s (Details already listed)\n' % indent) + return + + used_names.add(name) + print(indent + 'DB files:') + for node, broker in brokers: + print_db(node, broker, expect_type, indent_level=indent_level + 1) + + print(indent + 'Info:') + for node, broker in brokers: + print_broker_info(node, broker, indent_level=indent_level + 1) + + print(indent + 'Sharding info:') + for node, broker in brokers: + print_sharding_info(node, broker, indent_level=indent_level + 1) + print(indent + 'Own shard range:') + for node, broker in brokers: + shard_ranges = broker.get_shard_ranges( + include_deleted=True, include_own=True, exclude_others=True) + print_own_shard_range_info(node, shard_ranges, + indent_level=indent_level + 1) + print(indent + 'Shard ranges:') + shard_names = set() + for node, broker in brokers: + shard_ranges = broker.get_shard_ranges(include_deleted=True) + for sr_name in shard_ranges: + shard_names.add(sr_name.name) + print_shard_range_info(node, shard_ranges, + indent_level=indent_level + 1) + print(indent + 'Shards:') + for sr_name in shard_names: + print_container(sr_name, name2nodes2brokers, expect_type='SHARD', + indent_level=indent_level + 1, used_names=used_names) + print('\n') + + +def run(conf_paths): + # container_name -> (node id, node index) -> broker + name2nodes2brokers = defaultdict(dict) + for conf_path in conf_paths: + collect_brokers(conf_path, name2nodes2brokers) + + print('First column on each line is (node index)\n') + for name, node2broker in name2nodes2brokers.items(): + expect_root = False + for node, broker in node2broker.items(): + expect_root = broker.is_root_container() or expect_root + if expect_root: + print_container(name, name2nodes2brokers) + + +if __name__ == '__main__': + conf_dir = '/etc/swift/container-server' + conf_paths = [os.path.join(conf_dir, p) for p in os.listdir(conf_dir) + if p.endswith(('conf', 'conf.d'))] + run(conf_paths) diff --git a/swift/common/db.py b/swift/common/db.py index b05eeb8d11..6425e85034 100644 --- a/swift/common/db.py +++ b/swift/common/db.py @@ -71,6 +71,18 @@ def native_str_keys(metadata): metadata[k.decode('utf-8')] = sv +ZERO_LIKE_VALUES = {None, '', 0, '0'} + + +def zero_like(count): + """ + We've cargo culted our consumers to be tolerant of various expressions of + zero in our databases for backwards compatibility with less disciplined + producers. + """ + return count in ZERO_LIKE_VALUES + + def _db_timeout(timeout, db_file, call): with LockTimeout(timeout, db_file): retry_wait = 0.001 @@ -208,11 +220,27 @@ class DatabaseBroker(object): def __init__(self, db_file, timeout=BROKER_TIMEOUT, logger=None, account=None, container=None, pending_timeout=None, - stale_reads_ok=False): - """Encapsulates working with a database.""" + stale_reads_ok=False, skip_commits=False): + """Encapsulates working with a database. + + :param db_file: path to a database file. + :param timeout: timeout used for database operations. + :param logger: a logger instance. + :param account: name of account. + :param container: name of container. + :param pending_timeout: timeout used when attempting to take a lock to + write to pending file. + :param stale_reads_ok: if True then no error is raised if pending + commits cannot be committed before the database is read, otherwise + an error is raised. + :param skip_commits: if True then this broker instance will never + commit records from the pending file to the database; + :meth:`~swift.common.db.DatabaseBroker.put_record` should not + called on brokers with skip_commits True. + """ self.conn = None - self.db_file = db_file - self.pending_file = self.db_file + '.pending' + self._db_file = db_file + self.pending_file = self._db_file + '.pending' self.pending_timeout = pending_timeout or 10 self.stale_reads_ok = stale_reads_ok self.db_dir = os.path.dirname(db_file) @@ -221,6 +249,7 @@ class DatabaseBroker(object): self.account = account self.container = container self._db_version = -1 + self.skip_commits = skip_commits def __str__(self): """ @@ -240,9 +269,9 @@ class DatabaseBroker(object): :param put_timestamp: internalized timestamp of initial PUT request :param storage_policy_index: only required for containers """ - if self.db_file == ':memory:': + if self._db_file == ':memory:': tmp_db_file = None - conn = get_db_connection(self.db_file, self.timeout) + conn = get_db_connection(self._db_file, self.timeout) else: mkdirs(self.db_dir) fd, tmp_db_file = mkstemp(suffix='.tmp', dir=self.db_dir) @@ -329,15 +358,22 @@ class DatabaseBroker(object): self._delete_db(conn, timestamp) conn.commit() + @property + def db_file(self): + return self._db_file + + def get_device_path(self): + suffix_path = os.path.dirname(self.db_dir) + partition_path = os.path.dirname(suffix_path) + dbs_path = os.path.dirname(partition_path) + return os.path.dirname(dbs_path) + def quarantine(self, reason): """ The database will be quarantined and a sqlite3.DatabaseError will be raised indicating the action taken. """ - prefix_path = os.path.dirname(self.db_dir) - partition_path = os.path.dirname(prefix_path) - dbs_path = os.path.dirname(partition_path) - device_path = os.path.dirname(dbs_path) + device_path = self.get_device_path() quar_path = os.path.join(device_path, 'quarantined', self.db_type + 's', os.path.basename(self.db_dir)) @@ -377,6 +413,20 @@ class DatabaseBroker(object): self.quarantine(exc_hint) + @contextmanager + def updated_timeout(self, new_timeout): + """Use with "with" statement; updates ``timeout`` within the block.""" + old_timeout = self.timeout + try: + self.timeout = new_timeout + if self.conn: + self.conn.timeout = new_timeout + yield old_timeout + finally: + self.timeout = old_timeout + if self.conn: + self.conn.timeout = old_timeout + @contextmanager def get(self): """Use with the "with" statement; returns a database connection.""" @@ -477,6 +527,23 @@ class DatabaseBroker(object): with self.get() as conn: return self._is_deleted(conn) + def empty(self): + """ + Check if the broker abstraction contains any undeleted records. + """ + raise NotImplementedError() + + def is_reclaimable(self, now, reclaim_age): + """ + Check if the broker abstraction is empty, and has been marked deleted + for at least a reclaim age. + """ + info = self.get_replication_info() + return (zero_like(info['count']) and + (Timestamp(now - reclaim_age) > + Timestamp(info['delete_timestamp']) > + Timestamp(info['put_timestamp']))) + def merge_timestamps(self, created_at, put_timestamp, delete_timestamp): """ Used in replication to handle updating timestamps. @@ -548,13 +615,15 @@ class DatabaseBroker(object): result.append({'remote_id': row[0], 'sync_point': row[1]}) return result - def get_max_row(self): + def get_max_row(self, table=None): + if not table: + table = self.db_contains_type query = ''' SELECT SQLITE_SEQUENCE.seq FROM SQLITE_SEQUENCE WHERE SQLITE_SEQUENCE.name == '%s' LIMIT 1 - ''' % (self.db_contains_type) + ''' % (table, ) with self.get() as conn: row = conn.execute(query).fetchone() return row[0] if row else -1 @@ -582,11 +651,26 @@ class DatabaseBroker(object): return curs.fetchone() def put_record(self, record): - if self.db_file == ':memory:': + """ + Put a record into the DB. If the DB has an associated pending file with + space then the record is appended to that file and a commit to the DB + is deferred. If the DB is in-memory or its pending file is full then + the record will be committed immediately. + + :param record: a record to be added to the DB. + :raises DatabaseConnectionError: if the DB file does not exist or if + ``skip_commits`` is True. + :raises LockTimeout: if a timeout occurs while waiting to take a lock + to write to the pending file. + """ + if self._db_file == ':memory:': self.merge_items([record]) return if not os.path.exists(self.db_file): raise DatabaseConnectionError(self.db_file, "DB doesn't exist") + if self.skip_commits: + raise DatabaseConnectionError(self.db_file, + 'commits not accepted') with lock_parent_directory(self.pending_file, self.pending_timeout): pending_size = 0 try: @@ -606,6 +690,10 @@ class DatabaseBroker(object): protocol=PICKLE_PROTOCOL).encode('base64')) fp.flush() + def _skip_commit_puts(self): + return (self._db_file == ':memory:' or self.skip_commits or not + os.path.exists(self.pending_file)) + def _commit_puts(self, item_list=None): """ Scan for .pending files and commit the found records by feeding them @@ -614,7 +702,13 @@ class DatabaseBroker(object): :param item_list: A list of items to commit in addition to .pending """ - if self.db_file == ':memory:' or not os.path.exists(self.pending_file): + if self._skip_commit_puts(): + if item_list: + # this broker instance should not be used to commit records, + # but if it is then raise an error rather than quietly + # discarding the records in item_list. + raise DatabaseConnectionError(self.db_file, + 'commits not accepted') return if item_list is None: item_list = [] @@ -645,7 +739,7 @@ class DatabaseBroker(object): Catch failures of _commit_puts() if broker is intended for reading of stats, and thus does not care for pending updates. """ - if self.db_file == ':memory:' or not os.path.exists(self.pending_file): + if self._skip_commit_puts(): return try: with lock_parent_directory(self.pending_file, @@ -663,6 +757,12 @@ class DatabaseBroker(object): """ raise NotImplementedError + def merge_items(self, item_list, source=None): + """ + Save :param:item_list to the database. + """ + raise NotImplementedError + def make_tuple_for_pickle(self, record): """ Turn this db record dict into the format this service uses for @@ -701,7 +801,7 @@ class DatabaseBroker(object): within 512k of a boundary, it allocates to the next boundary. Boundaries are 2m, 5m, 10m, 25m, 50m, then every 50m after. """ - if not DB_PREALLOCATION or self.db_file == ':memory:': + if not DB_PREALLOCATION or self._db_file == ':memory:': return MB = (1024 * 1024) @@ -830,40 +930,46 @@ class DatabaseBroker(object): def reclaim(self, age_timestamp, sync_timestamp): """ - Delete rows from the db_contains_type table that are marked deleted - and whose created_at timestamp is < age_timestamp. Also deletes rows - from incoming_sync and outgoing_sync where the updated_at timestamp is - < sync_timestamp. + Delete reclaimable rows and metadata from the db. - In addition, this calls the DatabaseBroker's :func:`_reclaim` method. + By default this method will delete rows from the db_contains_type table + that are marked deleted and whose created_at timestamp is < + age_timestamp, and deletes rows from incoming_sync and outgoing_sync + where the updated_at timestamp is < sync_timestamp. In addition, this + calls the :meth:`_reclaim_metadata` method. + + Subclasses may reclaim other items by overriding :meth:`_reclaim`. :param age_timestamp: max created_at timestamp of object rows to delete :param sync_timestamp: max update_at timestamp of sync rows to delete """ - if self.db_file != ':memory:' and os.path.exists(self.pending_file): + if not self._skip_commit_puts(): with lock_parent_directory(self.pending_file, self.pending_timeout): self._commit_puts() with self.get() as conn: - conn.execute(''' - DELETE FROM %s WHERE deleted = 1 AND %s < ? - ''' % (self.db_contains_type, self.db_reclaim_timestamp), - (age_timestamp,)) - try: - conn.execute(''' - DELETE FROM outgoing_sync WHERE updated_at < ? - ''', (sync_timestamp,)) - conn.execute(''' - DELETE FROM incoming_sync WHERE updated_at < ? - ''', (sync_timestamp,)) - except sqlite3.OperationalError as err: - # Old dbs didn't have updated_at in the _sync tables. - if 'no such column: updated_at' not in str(err): - raise - DatabaseBroker._reclaim(self, conn, age_timestamp) + self._reclaim(conn, age_timestamp, sync_timestamp) + self._reclaim_metadata(conn, age_timestamp) conn.commit() - def _reclaim(self, conn, timestamp): + def _reclaim(self, conn, age_timestamp, sync_timestamp): + conn.execute(''' + DELETE FROM %s WHERE deleted = 1 AND %s < ? + ''' % (self.db_contains_type, self.db_reclaim_timestamp), + (age_timestamp,)) + try: + conn.execute(''' + DELETE FROM outgoing_sync WHERE updated_at < ? + ''', (sync_timestamp,)) + conn.execute(''' + DELETE FROM incoming_sync WHERE updated_at < ? + ''', (sync_timestamp,)) + except sqlite3.OperationalError as err: + # Old dbs didn't have updated_at in the _sync tables. + if 'no such column: updated_at' not in str(err): + raise + + def _reclaim_metadata(self, conn, timestamp): """ Removes any empty metadata values older than the timestamp using the given database connection. This function will not call commit on the diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index c464341b21..0d063cd455 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -33,10 +33,12 @@ from swift.common.direct_client import quote from swift.common.utils import get_logger, whataremyips, storage_directory, \ renamer, mkdirs, lock_parent_directory, config_true_value, \ unlink_older_than, dump_recon_cache, rsync_module_interpolation, \ - json, Timestamp, parse_override_options, round_robin_iter, Everything + json, parse_override_options, round_robin_iter, Everything, get_db_files, \ + parse_db_filename from swift.common import ring from swift.common.ring.utils import is_local_device -from swift.common.http import HTTP_NOT_FOUND, HTTP_INSUFFICIENT_STORAGE +from swift.common.http import HTTP_NOT_FOUND, HTTP_INSUFFICIENT_STORAGE, \ + is_success from swift.common.bufferedhttp import BufferedHTTPConnection from swift.common.exceptions import DriveNotMounted from swift.common.daemon import Daemon @@ -87,11 +89,14 @@ def roundrobin_datadirs(datadirs): found (in their proper places). The partitions within each data dir are walked randomly, however. - :param datadirs: a list of (path, node_id, partition_filter) to walk - :returns: A generator of (partition, path_to_db_file, node_id) + :param datadirs: a list of tuples of (path, context, partition_filter) to + walk. The context may be any object; the context is not + used by this function but is included with each yielded + tuple. + :returns: A generator of (partition, path_to_db_file, context) """ - def walk_datadir(datadir, node_id, part_filter): + def walk_datadir(datadir, context, part_filter): partitions = [pd for pd in os.listdir(datadir) if looks_like_partition(pd) and part_filter(pd)] random.shuffle(partitions) @@ -116,17 +121,23 @@ def roundrobin_datadirs(datadirs): if not os.path.isdir(hash_dir): continue object_file = os.path.join(hash_dir, hsh + '.db') + # common case if os.path.exists(object_file): - yield (partition, object_file, node_id) - else: - try: - os.rmdir(hash_dir) - except OSError as e: - if e.errno != errno.ENOTEMPTY: - raise + yield (partition, object_file, context) + continue + # look for any alternate db filenames + db_files = get_db_files(object_file) + if db_files: + yield (partition, db_files[-1], context) + continue + try: + os.rmdir(hash_dir) + except OSError as e: + if e.errno != errno.ENOTEMPTY: + raise - its = [walk_datadir(datadir, node_id, filt) - for datadir, node_id, filt in datadirs] + its = [walk_datadir(datadir, context, filt) + for datadir, context, filt in datadirs] rr_its = round_robin_iter(its) for datadir in rr_its: @@ -212,7 +223,7 @@ class Replicator(Daemon): self.stats = {'attempted': 0, 'success': 0, 'failure': 0, 'ts_repl': 0, 'no_change': 0, 'hashmatch': 0, 'rsync': 0, 'diff': 0, 'remove': 0, 'empty': 0, 'remote_merge': 0, - 'start': time.time(), 'diff_capped': 0, + 'start': time.time(), 'diff_capped': 0, 'deferred': 0, 'failure_nodes': {}} def _report_stats(self): @@ -309,9 +320,20 @@ class Replicator(Daemon): different_region=different_region): return False with Timeout(replicate_timeout or self.node_timeout): - response = http.replicate(replicate_method, local_id) + response = http.replicate(replicate_method, local_id, + os.path.basename(broker.db_file)) return response and 200 <= response.status < 300 + def _send_replicate_request(self, http, *repl_args): + with Timeout(self.node_timeout): + response = http.replicate(*repl_args) + if not response or not is_success(response.status): + if response: + self.logger.error('ERROR Bad response %s from %s', + response.status, http.host) + return False + return True + def _usync_db(self, point, broker, http, remote_id, local_id): """ Sync a db by sending all records since the last sync. @@ -326,26 +348,29 @@ class Replicator(Daemon): """ self.stats['diff'] += 1 self.logger.increment('diffs') - self.logger.debug('Syncing chunks with %s, starting at %s', - http.host, point) + self.logger.debug('%s usyncing chunks to %s, starting at row %s', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % http.node, + point) + start = time.time() sync_table = broker.get_syncs() objects = broker.get_items_since(point, self.per_diff) diffs = 0 while len(objects) and diffs < self.max_diffs: diffs += 1 - with Timeout(self.node_timeout): - response = http.replicate('merge_items', objects, local_id) - if not response or response.status >= 300 or response.status < 200: - if response: - self.logger.error(_('ERROR Bad response %(status)s from ' - '%(host)s'), - {'status': response.status, - 'host': http.host}) + if not self._send_replicate_request( + http, 'merge_items', objects, local_id): return False # replication relies on db order to send the next merge batch in # order with no gaps point = objects[-1]['ROWID'] objects = broker.get_items_since(point, self.per_diff) + + self.logger.debug('%s usyncing chunks to %s, finished at row %s (%gs)', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % http.node, + point, time.time() - start) + if objects: self.logger.debug( 'Synchronization for %s has fallen more than ' @@ -397,9 +422,8 @@ class Replicator(Daemon): :returns: ReplConnection object """ - return ReplConnection(node, partition, - os.path.basename(db_file).split('.', 1)[0], - self.logger) + hsh, other, ext = parse_db_filename(db_file) + return ReplConnection(node, partition, hsh, self.logger) def _gather_sync_args(self, info): """ @@ -449,32 +473,79 @@ class Replicator(Daemon): if rinfo.get('metadata', ''): broker.update_metadata(json.loads(rinfo['metadata'])) if self._in_sync(rinfo, info, broker, local_sync): + self.logger.debug('%s in sync with %s, nothing to do', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % node) return True - # if the difference in rowids between the two differs by - # more than 50% and the difference is greater than per_diff, - # rsync then do a remote merge. - # NOTE: difference > per_diff stops us from dropping to rsync - # on smaller containers, who have only a few rows to sync. - if rinfo['max_row'] / float(info['max_row']) < 0.5 and \ - info['max_row'] - rinfo['max_row'] > self.per_diff: - self.stats['remote_merge'] += 1 - self.logger.increment('remote_merges') - return self._rsync_db(broker, node, http, info['id'], - replicate_method='rsync_then_merge', - replicate_timeout=(info['count'] / 2000), - different_region=different_region) - # else send diffs over to the remote server - return self._usync_db(max(rinfo['point'], local_sync), - broker, http, rinfo['id'], info['id']) + return self._choose_replication_mode( + node, rinfo, info, local_sync, broker, http, + different_region) + return False + + def _choose_replication_mode(self, node, rinfo, info, local_sync, broker, + http, different_region): + # if the difference in rowids between the two differs by + # more than 50% and the difference is greater than per_diff, + # rsync then do a remote merge. + # NOTE: difference > per_diff stops us from dropping to rsync + # on smaller containers, who have only a few rows to sync. + if (rinfo['max_row'] / float(info['max_row']) < 0.5 and + info['max_row'] - rinfo['max_row'] > self.per_diff): + self.stats['remote_merge'] += 1 + self.logger.increment('remote_merges') + return self._rsync_db(broker, node, http, info['id'], + replicate_method='rsync_then_merge', + replicate_timeout=(info['count'] / 2000), + different_region=different_region) + # else send diffs over to the remote server + return self._usync_db(max(rinfo['point'], local_sync), + broker, http, rinfo['id'], info['id']) def _post_replicate_hook(self, broker, info, responses): """ - :param broker: the container that just replicated + :param broker: broker instance for the database that just replicated :param info: pre-replication full info dict :param responses: a list of bools indicating success from nodes """ pass + def cleanup_post_replicate(self, broker, orig_info, responses): + """ + Cleanup non primary database from disk if needed. + + :param broker: the broker for the database we're replicating + :param orig_info: snapshot of the broker replication info dict taken + before replication + :param responses: a list of boolean success values for each replication + request to other nodes + + :return success: returns False if deletion of the database was + attempted but unsuccessful, otherwise returns True. + """ + log_template = 'Not deleting db %s (%%s)' % broker.db_file + max_row_delta = broker.get_max_row() - orig_info['max_row'] + if max_row_delta < 0: + reason = 'negative max_row_delta: %s' % max_row_delta + self.logger.error(log_template, reason) + return True + if max_row_delta: + reason = '%s new rows' % max_row_delta + self.logger.debug(log_template, reason) + return True + if not (responses and all(responses)): + reason = '%s/%s success' % (responses.count(True), len(responses)) + self.logger.debug(log_template, reason) + return True + # If the db has been successfully synced to all of its peers, it can be + # removed. Callers should have already checked that the db is not on a + # primary node. + if not self.delete_db(broker): + self.logger.debug( + 'Failed to delete db %s', broker.db_file) + return False + self.logger.debug('Successfully deleted db %s', broker.db_file) + return True + def _replicate_object(self, partition, object_file, node_id): """ Replicate the db, choosing method based on whether or not it @@ -483,12 +554,20 @@ class Replicator(Daemon): :param partition: partition to be replicated to :param object_file: DB file name to be replicated :param node_id: node id of the node to be replicated to + :returns: a tuple (success, responses). ``success`` is a boolean that + is True if the method completed successfully, False otherwise. + ``responses`` is a list of booleans each of which indicates the + success or not of replicating to a peer node if replication has + been attempted. ``success`` is False if any of ``responses`` is + False; when ``responses`` is empty, ``success`` may be either True + or False. """ start_time = now = time.time() self.logger.debug('Replicating db %s', object_file) self.stats['attempted'] += 1 self.logger.increment('attempts') shouldbehere = True + responses = [] try: broker = self.brokerclass(object_file, pending_timeout=30) broker.reclaim(now - self.reclaim_age, @@ -518,18 +597,12 @@ class Replicator(Daemon): failure_dev['device']) for failure_dev in nodes]) self.logger.increment('failures') - return - # The db is considered deleted if the delete_timestamp value is greater - # than the put_timestamp, and there are no objects. - delete_timestamp = Timestamp(info.get('delete_timestamp') or 0) - put_timestamp = Timestamp(info.get('put_timestamp') or 0) - if (now - self.reclaim_age) > delete_timestamp > put_timestamp and \ - info['count'] in (None, '', 0, '0'): + return False, responses + if broker.is_reclaimable(now, self.reclaim_age): if self.report_up_to_date(info): self.delete_db(broker) self.logger.timing_since('timing', start_time) - return - responses = [] + return True, responses failure_devs_info = set() nodes = self.ring.get_part_nodes(int(partition)) local_dev = None @@ -587,14 +660,11 @@ class Replicator(Daemon): except (Exception, Timeout): self.logger.exception('UNHANDLED EXCEPTION: in post replicate ' 'hook for %s', broker.db_file) - if not shouldbehere and responses and all(responses): - # If the db shouldn't be on this node and has been successfully - # synced to all of its peers, it can be removed. - if not self.delete_db(broker): + if not shouldbehere: + if not self.cleanup_post_replicate(broker, info, responses): failure_devs_info.update( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in repl_nodes]) - target_devs_info = set([(target_dev['replication_ip'], target_dev['device']) for target_dev in repl_nodes]) @@ -602,6 +672,9 @@ class Replicator(Daemon): self._add_failure_stats(failure_devs_info) self.logger.timing_since('timing', start_time) + if shouldbehere: + responses.append(True) + return all(responses), responses def delete_db(self, broker): object_file = broker.db_file @@ -746,6 +819,9 @@ class ReplicatorRpc(object): self.mount_check = mount_check self.logger = logger or get_logger({}, log_route='replicator-rpc') + def _db_file_exists(self, db_path): + return os.path.exists(db_path) + def dispatch(self, replicate_args, args): if not hasattr(args, 'pop'): return HTTPBadRequest(body='Invalid object type') @@ -764,7 +840,7 @@ class ReplicatorRpc(object): # someone might be about to rsync a db to us, # make sure there's a tmp dir to receive it. mkdirs(os.path.join(self.root, drive, 'tmp')) - if not os.path.exists(db_file): + if not self._db_file_exists(db_file): return HTTPNotFound() return getattr(self, op)(self.broker_class(db_file), args) @@ -863,6 +939,8 @@ class ReplicatorRpc(object): def complete_rsync(self, drive, db_file, args): old_filename = os.path.join(self.root, drive, 'tmp', args[0]) + if args[1:]: + db_file = os.path.join(os.path.dirname(db_file), args[1]) if os.path.exists(db_file): return HTTPNotFound() if not os.path.exists(old_filename): @@ -872,12 +950,21 @@ class ReplicatorRpc(object): renamer(old_filename, db_file) return HTTPNoContent() + def _abort_rsync_then_merge(self, db_file, tmp_filename): + return not (self._db_file_exists(db_file) and + os.path.exists(tmp_filename)) + + def _post_rsync_then_merge_hook(self, existing_broker, new_broker): + # subclasses may override to make custom changes to the new broker + pass + def rsync_then_merge(self, drive, db_file, args): - old_filename = os.path.join(self.root, drive, 'tmp', args[0]) - if not os.path.exists(db_file) or not os.path.exists(old_filename): + tmp_filename = os.path.join(self.root, drive, 'tmp', args[0]) + if self._abort_rsync_then_merge(db_file, tmp_filename): return HTTPNotFound() - new_broker = self.broker_class(old_filename) + new_broker = self.broker_class(tmp_filename) existing_broker = self.broker_class(db_file) + db_file = existing_broker.db_file point = -1 objects = existing_broker.get_items_since(point, 1000) while len(objects): @@ -885,9 +972,13 @@ class ReplicatorRpc(object): point = objects[-1]['ROWID'] objects = existing_broker.get_items_since(point, 1000) sleep() + new_broker.merge_syncs(existing_broker.get_syncs()) + self._post_rsync_then_merge_hook(existing_broker, new_broker) new_broker.newid(args[0]) new_broker.update_metadata(existing_broker.metadata) - renamer(old_filename, db_file) + if self._abort_rsync_then_merge(db_file, tmp_filename): + return HTTPNotFound() + renamer(tmp_filename, db_file) return HTTPNoContent() # Footnote [1]: diff --git a/swift/common/direct_client.py b/swift/common/direct_client.py index fad4440f64..9f112afa95 100644 --- a/swift/common/direct_client.py +++ b/swift/common/direct_client.py @@ -54,22 +54,72 @@ class DirectClientException(ClientException): http_reason=resp.reason, http_headers=headers) -def _make_req(node, part, method, path, _headers, stype, - conn_timeout=5, response_timeout=15): +def _make_req(node, part, method, path, headers, stype, + conn_timeout=5, response_timeout=15, send_timeout=15, + contents=None, content_length=None, chunk_size=65535): """ Make request to backend storage node. (i.e. 'Account', 'Container', 'Object') :param node: a node dict from a ring - :param part: an integer, the partion number + :param part: an integer, the partition number :param method: a string, the HTTP method (e.g. 'PUT', 'DELETE', etc) :param path: a string, the request path :param headers: a dict, header name => value :param stype: a string, describing the type of service + :param conn_timeout: timeout while waiting for connection; default is 5 + seconds + :param response_timeout: timeout while waiting for response; default is 15 + seconds + :param send_timeout: timeout for sending request body; default is 15 + seconds + :param contents: an iterable or string to read object data from + :param content_length: value to send as content-length header + :param chunk_size: if defined, chunk size of data to send :returns: an HTTPResponse object + :raises DirectClientException: if the response status is not 2xx + :raises eventlet.Timeout: if either conn_timeout or response_timeout is + exceeded """ + if contents is not None: + if content_length is not None: + headers['Content-Length'] = str(content_length) + else: + for n, v in headers.items(): + if n.lower() == 'content-length': + content_length = int(v) + if not contents: + headers['Content-Length'] = '0' + if isinstance(contents, six.string_types): + contents = [contents] + if content_length is None: + headers['Transfer-Encoding'] = 'chunked' + with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, - method, path, headers=_headers) + method, path, headers=headers) + + if contents is not None: + contents_f = FileLikeIter(contents) + + with Timeout(send_timeout): + if content_length is None: + chunk = contents_f.read(chunk_size) + while chunk: + conn.send('%x\r\n%s\r\n' % (len(chunk), chunk)) + chunk = contents_f.read(chunk_size) + conn.send('0\r\n\r\n') + else: + left = content_length + while left > 0: + size = chunk_size + if size > left: + size = left + chunk = contents_f.read(size) + if not chunk: + break + conn.send(chunk) + left -= len(chunk) + with Timeout(response_timeout): resp = conn.getresponse() resp.read() @@ -82,7 +132,7 @@ def _get_direct_account_container(path, stype, node, part, marker=None, limit=None, prefix=None, delimiter=None, conn_timeout=5, response_timeout=15, - end_marker=None, reverse=None): + end_marker=None, reverse=None, headers=None): """Base class for get direct account and container. Do not use directly use the get_direct_account or @@ -105,7 +155,7 @@ def _get_direct_account_container(path, stype, node, part, with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'GET', path, query_string=qs, - headers=gen_headers()) + headers=gen_headers(hdrs_in=headers)) with Timeout(response_timeout): resp = conn.getresponse() if not is_success(resp.status): @@ -121,11 +171,12 @@ def _get_direct_account_container(path, stype, node, part, return resp_headers, json.loads(resp.read()) -def gen_headers(hdrs_in=None, add_ts=False): +def gen_headers(hdrs_in=None, add_ts=False, add_user_agent=True): hdrs_out = HeaderKeyDict(hdrs_in) if hdrs_in else HeaderKeyDict() if add_ts: hdrs_out['X-Timestamp'] = Timestamp.now().internal - hdrs_out['User-Agent'] = 'direct-client %s' % os.getpid() + if add_user_agent: + hdrs_out['User-Agent'] = 'direct-client %s' % os.getpid() return hdrs_out @@ -197,7 +248,7 @@ def direct_head_container(node, part, account, container, conn_timeout=5, def direct_get_container(node, part, account, container, marker=None, limit=None, prefix=None, delimiter=None, conn_timeout=5, response_timeout=15, end_marker=None, - reverse=None): + reverse=None, headers=None): """ Get container listings directly from the container server. @@ -213,6 +264,7 @@ def direct_get_container(node, part, account, container, marker=None, :param response_timeout: timeout in seconds for getting the response :param end_marker: end_marker query :param reverse: reverse the returned listing + :param headers: headers to be included in the request :returns: a tuple of (response headers, a list of objects) The response headers will be a HeaderKeyDict. """ @@ -224,7 +276,8 @@ def direct_get_container(node, part, account, container, marker=None, end_marker=end_marker, reverse=reverse, conn_timeout=conn_timeout, - response_timeout=response_timeout) + response_timeout=response_timeout, + headers=headers) def direct_delete_container(node, part, account, container, conn_timeout=5, @@ -250,6 +303,37 @@ def direct_delete_container(node, part, account, container, conn_timeout=5, 'Container', conn_timeout, response_timeout) +def direct_put_container(node, part, account, container, conn_timeout=5, + response_timeout=15, headers=None, contents=None, + content_length=None, chunk_size=65535): + """ + Make a PUT request to a container server. + + :param node: node dictionary from the ring + :param part: partition the container is on + :param account: account name + :param container: container name + :param conn_timeout: timeout in seconds for establishing the connection + :param response_timeout: timeout in seconds for getting the response + :param headers: additional headers to include in the request + :param contents: an iterable or string to send in request body (optional) + :param content_length: value to send as content-length header (optional) + :param chunk_size: chunk size of data to send (optional) + :raises ClientException: HTTP PUT request failed + """ + if headers is None: + headers = {} + + lower_headers = set(k.lower() for k in headers) + headers_out = gen_headers(headers, + add_ts='x-timestamp' not in lower_headers, + add_user_agent='user-agent' not in lower_headers) + path = '/%s/%s' % (account, container) + _make_req(node, part, 'PUT', path, headers_out, 'Container', conn_timeout, + response_timeout, contents=contents, + content_length=content_length, chunk_size=chunk_size) + + def direct_put_container_object(node, part, account, container, obj, conn_timeout=5, response_timeout=15, headers=None): @@ -385,56 +469,18 @@ def direct_put_object(node, part, account, container, name, contents, headers = {} if etag: headers['ETag'] = etag.strip('"') - if content_length is not None: - headers['Content-Length'] = str(content_length) - else: - for n, v in headers.items(): - if n.lower() == 'content-length': - content_length = int(v) if content_type is not None: headers['Content-Type'] = content_type else: headers['Content-Type'] = 'application/octet-stream' - if not contents: - headers['Content-Length'] = '0' - if isinstance(contents, six.string_types): - contents = [contents] # Incase the caller want to insert an object with specific age add_ts = 'X-Timestamp' not in headers - if content_length is None: - headers['Transfer-Encoding'] = 'chunked' + resp = _make_req( + node, part, 'PUT', path, gen_headers(headers, add_ts=add_ts), + 'Object', conn_timeout, response_timeout, contents=contents, + content_length=content_length, chunk_size=chunk_size) - with Timeout(conn_timeout): - conn = http_connect(node['ip'], node['port'], node['device'], part, - 'PUT', path, headers=gen_headers(headers, add_ts)) - - contents_f = FileLikeIter(contents) - - if content_length is None: - chunk = contents_f.read(chunk_size) - while chunk: - conn.send('%x\r\n%s\r\n' % (len(chunk), chunk)) - chunk = contents_f.read(chunk_size) - conn.send('0\r\n\r\n') - else: - left = content_length - while left > 0: - size = chunk_size - if size > left: - size = left - chunk = contents_f.read(size) - if not chunk: - break - conn.send(chunk) - left -= len(chunk) - - with Timeout(response_timeout): - resp = conn.getresponse() - resp.read() - if not is_success(resp.status): - raise DirectClientException('Object', 'PUT', - node, part, path, resp) return resp.getheader('etag').strip('"') diff --git a/swift/common/manager.py b/swift/common/manager.py index 330f8310f4..71f9e689b3 100644 --- a/swift/common/manager.py +++ b/swift/common/manager.py @@ -34,7 +34,7 @@ PROC_DIR = '/proc' ALL_SERVERS = ['account-auditor', 'account-server', 'container-auditor', 'container-replicator', 'container-reconciler', - 'container-server', 'container-sync', + 'container-server', 'container-sharder', 'container-sync', 'container-updater', 'object-auditor', 'object-server', 'object-expirer', 'object-replicator', 'object-reconstructor', 'object-updater', @@ -637,13 +637,16 @@ class Server(object): {'server': self.server, 'pid': pid, 'conf': conf_file}) return 0 - def spawn(self, conf_file, once=False, wait=True, daemon=True, **kwargs): + def spawn(self, conf_file, once=False, wait=True, daemon=True, + additional_args=None, **kwargs): """Launch a subprocess for this server. :param conf_file: path to conf_file to use as first arg :param once: boolean, add once argument to command :param wait: boolean, if true capture stdout with a pipe :param daemon: boolean, if false ask server to log to console + :param additional_args: list of additional arguments to pass + on the command line :returns: the pid of the spawned process """ @@ -653,6 +656,10 @@ class Server(object): if not daemon: # ask the server to log to console args.append('verbose') + if additional_args: + if isinstance(additional_args, str): + additional_args = [additional_args] + args.extend(additional_args) # figure out what we're going to do with stdio if not daemon: diff --git a/swift/common/utils.py b/swift/common/utils.py index 54efdf2b18..048e64d65d 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -19,10 +19,12 @@ from __future__ import print_function import base64 import binascii +import bisect import collections import errno import fcntl import grp +import hashlib import hmac import json import math @@ -76,6 +78,7 @@ from six.moves import range, http_client from six.moves.urllib.parse import ParseResult from six.moves.urllib.parse import quote as _quote from six.moves.urllib.parse import urlparse as stdlib_urlparse +from six import string_types from swift import gettext_ as _ import swift.common.exceptions @@ -409,6 +412,21 @@ def config_positive_int_value(value): return result +def config_float_value(value, minimum=None, maximum=None): + try: + val = float(value) + if minimum is not None and val < minimum: + raise ValueError() + if maximum is not None and val > maximum: + raise ValueError() + return val + except (TypeError, ValueError): + min_ = ', greater than %s' % minimum if minimum is not None else '' + max_ = ', less than %s' % maximum if maximum is not None else '' + raise ValueError('Config option must be a number%s%s, not "%s".' % + (min_, max_, value)) + + def config_auto_int_value(value, default): """ Returns default if value is None or 'auto'. @@ -4370,6 +4388,553 @@ def get_md5_socket(): return md5_sockfd +class ShardRange(object): + """ + A ShardRange encapsulates sharding state related to a container including + lower and upper bounds that define the object namespace for which the + container is responsible. + + Shard ranges may be persisted in a container database. Timestamps + associated with subsets of the shard range attributes are used to resolve + conflicts when a shard range needs to be merged with an existing shard + range record and the most recent version of an attribute should be + persisted. + + :param name: the name of the shard range; this should take the form of a + path to a container i.e. /. + :param timestamp: a timestamp that represents the time at which the + shard range's ``lower``, ``upper`` or ``deleted`` attributes were + last modified. + :param lower: the lower bound of object names contained in the shard range; + the lower bound *is not* included in the shard range namespace. + :param upper: the upper bound of object names contained in the shard range; + the upper bound *is* included in the shard range namespace. + :param object_count: the number of objects in the shard range; defaults to + zero. + :param bytes_used: the number of bytes in the shard range; defaults to + zero. + :param meta_timestamp: a timestamp that represents the time at which the + shard range's ``object_count`` and ``bytes_used`` were last updated; + defaults to the value of ``timestamp``. + :param deleted: a boolean; if True the shard range is considered to be + deleted. + :param state: the state; must be one of ShardRange.STATES; defaults to + CREATED. + :param state_timestamp: a timestamp that represents the time at which + ``state`` was forced to its current value; defaults to the value of + ``timestamp``. This timestamp is typically not updated with every + change of ``state`` because in general conflicts in ``state`` + attributes are resolved by choosing the larger ``state`` value. + However, when this rule does not apply, for example when changing state + from ``SHARDED`` to ``ACTIVE``, the ``state_timestamp`` may be advanced + so that the new ``state`` value is preferred over any older ``state`` + value. + :param epoch: optional epoch timestamp which represents the time at which + sharding was enabled for a container. + """ + FOUND = 10 + CREATED = 20 + CLEAVED = 30 + ACTIVE = 40 + SHRINKING = 50 + SHARDING = 60 + SHARDED = 70 + STATES = {FOUND: 'found', + CREATED: 'created', + CLEAVED: 'cleaved', + ACTIVE: 'active', + SHRINKING: 'shrinking', + SHARDING: 'sharding', + SHARDED: 'sharded'} + STATES_BY_NAME = dict((v, k) for k, v in STATES.items()) + + class OuterBound(object): + def __eq__(self, other): + return isinstance(other, type(self)) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return '' + + def __repr__(self): + return type(self).__name__ + + def __bool__(self): + return False + + __nonzero__ = __bool__ + + @functools.total_ordering + class MaxBound(OuterBound): + def __ge__(self, other): + return True + + @functools.total_ordering + class MinBound(OuterBound): + def __le__(self, other): + return True + + MIN = MinBound() + MAX = MaxBound() + + def __init__(self, name, timestamp, lower=MIN, upper=MAX, + object_count=0, bytes_used=0, meta_timestamp=None, + deleted=False, state=None, state_timestamp=None, epoch=None): + self.account = self.container = self._timestamp = \ + self._meta_timestamp = self._state_timestamp = self._epoch = None + self._lower = ShardRange.MIN + self._upper = ShardRange.MAX + self._deleted = False + self._state = None + + self.name = name + self.timestamp = timestamp + self.lower = lower + self.upper = upper + self.deleted = deleted + self.object_count = object_count + self.bytes_used = bytes_used + self.meta_timestamp = meta_timestamp + self.state = self.FOUND if state is None else state + self.state_timestamp = state_timestamp + self.epoch = epoch + + @classmethod + def _encode(cls, value): + if six.PY2 and isinstance(value, six.text_type): + return value.encode('utf-8') + return value + + def _encode_bound(self, bound): + if isinstance(bound, ShardRange.OuterBound): + return bound + if not isinstance(bound, string_types): + raise TypeError('must be a string type') + return self._encode(bound) + + @classmethod + def _make_container_name(cls, root_container, parent_container, timestamp, + index): + if not isinstance(parent_container, bytes): + parent_container = parent_container.encode('utf-8') + return "%s-%s-%s-%s" % (root_container, + hashlib.md5(parent_container).hexdigest(), + cls._to_timestamp(timestamp).internal, + index) + + @classmethod + def make_path(cls, shards_account, root_container, parent_container, + timestamp, index): + """ + Returns a path for a shard container that is valid to use as a name + when constructing a :class:`~swift.common.utils.ShardRange`. + + :param shards_account: the hidden internal account to which the shard + container belongs. + :param root_container: the name of the root container for the shard. + :param parent_container: the name of the parent container for the + shard; for initial first generation shards this should be the same + as ``root_container``; for shards of shards this should be the name + of the sharding shard container. + :param timestamp: an instance of :class:`~swift.common.utils.Timestamp` + :param index: a unique index that will distinguish the path from any + other path generated using the same combination of + ``shards_account``, ``root_container``, ``parent_container`` and + ``timestamp``. + :return: a string of the form / + """ + shard_container = cls._make_container_name( + root_container, parent_container, timestamp, index) + return '%s/%s' % (shards_account, shard_container) + + @classmethod + def _to_timestamp(cls, timestamp): + if timestamp is None or isinstance(timestamp, Timestamp): + return timestamp + return Timestamp(timestamp) + + @property + def name(self): + return '%s/%s' % (self.account, self.container) + + @name.setter + def name(self, path): + path = self._encode(path) + if not path or len(path.split('/')) != 2 or not all(path.split('/')): + raise ValueError( + "Name must be of the form '/', got %r" % + path) + self.account, self.container = path.split('/') + + @property + def timestamp(self): + return self._timestamp + + @timestamp.setter + def timestamp(self, ts): + if ts is None: + raise TypeError('timestamp cannot be None') + self._timestamp = self._to_timestamp(ts) + + @property + def meta_timestamp(self): + if self._meta_timestamp is None: + return self.timestamp + return self._meta_timestamp + + @meta_timestamp.setter + def meta_timestamp(self, ts): + self._meta_timestamp = self._to_timestamp(ts) + + @property + def lower(self): + return self._lower + + @property + def lower_str(self): + return str(self.lower) + + @lower.setter + def lower(self, value): + if value in (None, ''): + value = ShardRange.MIN + try: + value = self._encode_bound(value) + except TypeError as err: + raise TypeError('lower %s' % err) + if value > self._upper: + raise ValueError( + 'lower (%r) must be less than or equal to upper (%r)' % + (value, self.upper)) + self._lower = value + + @property + def end_marker(self): + return self.upper_str + '\x00' if self.upper else '' + + @property + def upper(self): + return self._upper + + @property + def upper_str(self): + return str(self.upper) + + @upper.setter + def upper(self, value): + if value in (None, ''): + value = ShardRange.MAX + try: + value = self._encode_bound(value) + except TypeError as err: + raise TypeError('upper %s' % err) + if value < self._lower: + raise ValueError( + 'upper (%r) must be greater than or equal to lower (%r)' % + (value, self.lower)) + self._upper = value + + @property + def object_count(self): + return self._count + + @object_count.setter + def object_count(self, count): + count = int(count) + if count < 0: + raise ValueError('object_count cannot be < 0') + self._count = count + + @property + def bytes_used(self): + return self._bytes + + @bytes_used.setter + def bytes_used(self, bytes_used): + bytes_used = int(bytes_used) + if bytes_used < 0: + raise ValueError('bytes_used cannot be < 0') + self._bytes = bytes_used + + def update_meta(self, object_count, bytes_used, meta_timestamp=None): + """ + Set the object stats metadata to the given values and update the + meta_timestamp to the current time. + + :param object_count: should be an integer + :param bytes_used: should be an integer + :param meta_timestamp: timestamp for metadata; if not given the + current time will be set. + :raises ValueError: if ``object_count`` or ``bytes_used`` cannot be + cast to an int, or if meta_timestamp is neither None nor can be + cast to a :class:`~swift.common.utils.Timestamp`. + """ + self.object_count = int(object_count) + self.bytes_used = int(bytes_used) + if meta_timestamp is None: + self.meta_timestamp = Timestamp.now() + else: + self.meta_timestamp = meta_timestamp + + def increment_meta(self, object_count, bytes_used): + """ + Increment the object stats metadata by the given values and update the + meta_timestamp to the current time. + + :param object_count: should be an integer + :param bytes_used: should be an integer + :raises ValueError: if ``object_count`` or ``bytes_used`` cannot be + cast to an int. + """ + self.update_meta(self.object_count + int(object_count), + self.bytes_used + int(bytes_used)) + + @classmethod + def resolve_state(cls, state): + """ + Given a value that may be either the name or the number of a state + return a tuple of (state number, state name). + + :param state: Either a string state name or an integer state number. + :return: A tuple (state number, state name) + :raises ValueError: if ``state`` is neither a valid state name nor a + valid state number. + """ + try: + state = state.lower() + state_num = cls.STATES_BY_NAME[state] + except (KeyError, AttributeError): + try: + state_name = cls.STATES[state] + except KeyError: + raise ValueError('Invalid state %r' % state) + else: + state_num = state + else: + state_name = state + return state_num, state_name + + @property + def state(self): + return self._state + + @state.setter + def state(self, state): + try: + float_state = float(state) + int_state = int(float_state) + except (ValueError, TypeError): + raise ValueError('Invalid state %r' % state) + if int_state != float_state or int_state not in self.STATES: + raise ValueError('Invalid state %r' % state) + self._state = int_state + + @property + def state_text(self): + return self.STATES[self.state] + + @property + def state_timestamp(self): + if self._state_timestamp is None: + return self.timestamp + return self._state_timestamp + + @state_timestamp.setter + def state_timestamp(self, ts): + self._state_timestamp = self._to_timestamp(ts) + + @property + def epoch(self): + return self._epoch + + @epoch.setter + def epoch(self, epoch): + self._epoch = self._to_timestamp(epoch) + + def update_state(self, state, state_timestamp=None): + """ + Set state to the given value and optionally update the state_timestamp + to the given time. + + :param state: new state, should be an integer + :param state_timestamp: timestamp for state; if not given the + state_timestamp will not be changed. + :return: True if the state or state_timestamp was changed, False + otherwise + """ + if state_timestamp is None and self.state == state: + return False + self.state = state + if state_timestamp is not None: + self.state_timestamp = state_timestamp + return True + + @property + def deleted(self): + return self._deleted + + @deleted.setter + def deleted(self, value): + self._deleted = bool(value) + + def set_deleted(self, timestamp=None): + """ + Mark the shard range deleted and set timestamp to the current time. + + :param timestamp: optional timestamp to set; if not given the + current time will be set. + :return: True if the deleted attribute or timestamp was changed, False + otherwise + """ + if timestamp is None and self.deleted: + return False + self.deleted = True + self.timestamp = timestamp or Timestamp.now() + return True + + def __contains__(self, item): + # test if the given item is within the namespace + if item == '': + return False + item = self._encode_bound(item) + return self.lower < item <= self.upper + + def __lt__(self, other): + # a ShardRange is less than other if its entire namespace is less than + # other; if other is another ShardRange that implies that this + # ShardRange's upper must be less than or equal to the other + # ShardRange's lower + if self.upper == ShardRange.MAX: + return False + if isinstance(other, ShardRange): + return self.upper <= other.lower + elif other is None: + return True + else: + return self.upper < other + + def __gt__(self, other): + # a ShardRange is greater than other if its entire namespace is greater + # than other; if other is another ShardRange that implies that this + # ShardRange's lower must be less greater than or equal to the other + # ShardRange's upper + if self.lower == ShardRange.MIN: + return False + if isinstance(other, ShardRange): + return self.lower >= other.upper + elif other is None: + return False + else: + return self.lower >= other + + def __eq__(self, other): + # test for equality of range bounds only + if not isinstance(other, ShardRange): + return False + return self.lower == other.lower and self.upper == other.upper + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return '%s<%r to %r as of %s, (%d, %d) as of %s, %s as of %s>' % ( + self.__class__.__name__, self.lower, self.upper, + self.timestamp.internal, self.object_count, self.bytes_used, + self.meta_timestamp.internal, self.state_text, + self.state_timestamp.internal) + + def entire_namespace(self): + """ + Returns True if the ShardRange includes the entire namespace, False + otherwise. + """ + return (self.lower == ShardRange.MIN and + self.upper == ShardRange.MAX) + + def overlaps(self, other): + """ + Returns True if the ShardRange namespace overlaps with the other + ShardRange's namespace. + + :param other: an instance of :class:`~swift.common.utils.ShardRange` + """ + if not isinstance(other, ShardRange): + return False + return max(self.lower, other.lower) < min(self.upper, other.upper) + + def includes(self, other): + """ + Returns True if this namespace includes the whole of the other + namespace, False otherwise. + + :param other: an instance of :class:`~swift.common.utils.ShardRange` + """ + return (self.lower <= other.lower) and (other.upper <= self.upper) + + def __iter__(self): + yield 'name', self.name + yield 'timestamp', self.timestamp.internal + yield 'lower', str(self.lower) + yield 'upper', str(self.upper) + yield 'object_count', self.object_count + yield 'bytes_used', self.bytes_used + yield 'meta_timestamp', self.meta_timestamp.internal + yield 'deleted', 1 if self.deleted else 0 + yield 'state', self.state + yield 'state_timestamp', self.state_timestamp.internal + yield 'epoch', self.epoch.internal if self.epoch is not None else None + + def copy(self, timestamp=None, **kwargs): + """ + Creates a copy of the ShardRange. + + :param timestamp: (optional) If given, the returned ShardRange will + have all of its timestamps set to this value. Otherwise the + returned ShardRange will have the original timestamps. + :return: an instance of :class:`~swift.common.utils.ShardRange` + """ + new = ShardRange.from_dict(dict(self, **kwargs)) + if timestamp: + new.timestamp = timestamp + new.meta_timestamp = new.state_timestamp = None + return new + + @classmethod + def from_dict(cls, params): + """ + Return an instance constructed using the given dict of params. This + method is deliberately less flexible than the class `__init__()` method + and requires all of the `__init__()` args to be given in the dict of + params. + + :param params: a dict of parameters + :return: an instance of this class + """ + return cls( + params['name'], params['timestamp'], params['lower'], + params['upper'], params['object_count'], params['bytes_used'], + params['meta_timestamp'], params['deleted'], params['state'], + params['state_timestamp'], params['epoch']) + + +def find_shard_range(item, ranges): + """ + Find a ShardRange in given list of ``shard_ranges`` whose namespace + contains ``item``. + + :param item: The item for a which a ShardRange is to be found. + :param ranges: a sorted list of ShardRanges. + :return: the ShardRange whose namespace contains ``item``, or None if + no suitable range is found. + """ + index = bisect.bisect_left(ranges, item) + if index != len(ranges) and item in ranges[index]: + return ranges[index] + return None + + def modify_priority(conf, logger): """ Modify priority by nice and ionice. @@ -4750,3 +5315,110 @@ def distribute_evenly(items, num_buckets): for index, item in enumerate(items): out[index % num_buckets].append(item) return out + + +def get_redirect_data(response): + """ + Extract a redirect location from a response's headers. + + :param response: a response + :return: a tuple of (path, Timestamp) if a Location header is found, + otherwise None + :raises ValueError: if the Location header is found but a + X-Backend-Redirect-Timestamp is not found, or if there is a problem + with the format of etiher header + """ + headers = HeaderKeyDict(response.getheaders()) + if 'Location' not in headers: + return None + location = urlparse(headers['Location']).path + account, container, _junk = split_path(location, 2, 3, True) + timestamp_val = headers.get('X-Backend-Redirect-Timestamp') + try: + timestamp = Timestamp(timestamp_val) + except (TypeError, ValueError): + raise ValueError('Invalid timestamp value: %s' % timestamp_val) + return '%s/%s' % (account, container), timestamp + + +def parse_db_filename(filename): + """ + Splits a db filename into three parts: the hash, the epoch, and the + extension. + + >>> parse_db_filename("ab2134.db") + ('ab2134', None, '.db') + >>> parse_db_filename("ab2134_1234567890.12345.db") + ('ab2134', '1234567890.12345', '.db') + + :param filename: A db file basename or path to a db file. + :return: A tuple of (hash , epoch, extension). ``epoch`` may be None. + :raises ValueError: if ``filename`` is not a path to a file. + """ + filename = os.path.basename(filename) + if not filename: + raise ValueError('Path to a file required.') + name, ext = os.path.splitext(filename) + parts = name.split('_') + hash_ = parts.pop(0) + epoch = parts[0] if parts else None + return hash_, epoch, ext + + +def make_db_file_path(db_path, epoch): + """ + Given a path to a db file, return a modified path whose filename part has + the given epoch. + + A db filename takes the form [_].db; this method replaces the + part of the given ``db_path`` with the given ``epoch`` value. + + :param db_path: Path to a db file that does not necessarily exist. + :param epoch: A string that will be used as the epoch in the new path's + filename; the value will be normalized to the normal string + representation of a :class:`~swift.common.utils.Timestamp`. + :return: A modified path to a db file. + :raises ValueError: if the ``epoch`` is not valid for constructing a + :class:`~swift.common.utils.Timestamp`. + """ + if epoch is None: + raise ValueError('epoch must not be None') + epoch = Timestamp(epoch).normal + hash_, _, ext = parse_db_filename(db_path) + db_dir = os.path.dirname(db_path) + return os.path.join(db_dir, '%s_%s%s' % (hash_, epoch, ext)) + + +def get_db_files(db_path): + """ + Given the path to a db file, return a sorted list of all valid db files + that actually exist in that path's dir. A valid db filename has the form: + + [_].db + + where matches the part of the given db_path as would be + parsed by :meth:`~swift.utils.common.parse_db_filename`. + + :param db_path: Path to a db file that does not necessarily exist. + :return: List of valid db files that do exist in the dir of the + ``db_path``. This list may be empty. + """ + db_dir, db_file = os.path.split(db_path) + try: + files = os.listdir(db_dir) + except OSError as err: + if err.errno == errno.ENOENT: + return [] + raise + if not files: + return [] + match_hash, epoch, ext = parse_db_filename(db_file) + results = [] + for f in files: + hash_, epoch, ext = parse_db_filename(f) + if ext != '.db': + continue + if hash_ != match_hash: + continue + results.append(os.path.join(db_dir, f)) + return sorted(results) diff --git a/swift/common/wsgi.py b/swift/common/wsgi.py index 752e8767aa..2a9409d92e 100644 --- a/swift/common/wsgi.py +++ b/swift/common/wsgi.py @@ -45,6 +45,9 @@ from swift.common.utils import capture_stdio, disable_fallocate, \ validate_configuration, get_hub, config_auto_int_value, \ reiterate +SIGNUM_TO_NAME = {getattr(signal, n): n for n in dir(signal) + if n.startswith('SIG') and '_' not in n} + # Set maximum line size of message headers to be accepted. wsgi.MAX_HEADER_LINE = constraints.MAX_HEADER_SIZE @@ -559,7 +562,8 @@ class WorkersStrategy(object): :param int pid: The new worker process' PID """ - self.logger.notice('Started child %s' % pid) + self.logger.notice('Started child %s from parent %s', + pid, os.getpid()) self.children.append(pid) def register_worker_exit(self, pid): @@ -569,7 +573,8 @@ class WorkersStrategy(object): :param int pid: The PID of the worker that exited. """ - self.logger.error('Removing dead child %s' % pid) + self.logger.error('Removing dead child %s from parent %s', + pid, os.getpid()) self.children.remove(pid) def shutdown_sockets(self): @@ -935,24 +940,17 @@ def run_wsgi(conf_path, app_section, *args, **kwargs): run_server(conf, logger, no_fork_sock, global_conf=global_conf) return 0 - def kill_children(*args): - """Kills the entire process group.""" - logger.error('SIGTERM received') - signal.signal(signal.SIGTERM, signal.SIG_IGN) - running[0] = False - os.killpg(0, signal.SIGTERM) + def stop_with_signal(signum, *args): + """Set running flag to False and capture the signum""" + running_context[0] = False + running_context[1] = signum - def hup(*args): - """Shuts down the server, but allows running requests to complete""" - logger.error('SIGHUP received') - signal.signal(signal.SIGHUP, signal.SIG_IGN) - running[0] = False + # context to hold boolean running state and stop signum + running_context = [True, None] + signal.signal(signal.SIGTERM, stop_with_signal) + signal.signal(signal.SIGHUP, stop_with_signal) - running = [True] - signal.signal(signal.SIGTERM, kill_children) - signal.signal(signal.SIGHUP, hup) - - while running[0]: + while running_context[0]: for sock, sock_info in strategy.new_worker_socks(): pid = os.fork() if pid == 0: @@ -992,11 +990,23 @@ def run_wsgi(conf_path, app_section, *args, **kwargs): sleep(0.01) except KeyboardInterrupt: logger.notice('User quit') - running[0] = False + running_context[0] = False break + if running_context[1] is not None: + try: + signame = SIGNUM_TO_NAME[running_context[1]] + except KeyError: + logger.error('Stopping with unexpected signal %r' % + running_context[1]) + else: + logger.error('%s received', signame) + if running_context[1] == signal.SIGTERM: + os.killpg(0, signal.SIGTERM) + strategy.shutdown_sockets() - logger.notice('Exited') + signal.signal(signal.SIGTERM, signal.SIG_IGN) + logger.notice('Exited (%s)', os.getpid()) return 0 diff --git a/swift/container/backend.py b/swift/container/backend.py index bab618286a..040b79ad0b 100644 --- a/swift/container/backend.py +++ b/swift/container/backend.py @@ -15,6 +15,7 @@ """ Pluggable Back-ends for Container Server """ +import errno import os from uuid import uuid4 @@ -23,16 +24,45 @@ import six import six.moves.cPickle as pickle from six.moves import range import sqlite3 +from eventlet import tpool +from swift.common.constraints import CONTAINER_LISTING_LIMIT +from swift.common.exceptions import LockTimeout from swift.common.utils import Timestamp, encode_timestamps, \ - decode_timestamps, extract_swift_bytes -from swift.common.db import DatabaseBroker, utf8encode - + decode_timestamps, extract_swift_bytes, storage_directory, hash_path, \ + ShardRange, renamer, find_shard_range, MD5_OF_EMPTY_STRING, mkdirs, \ + get_db_files, parse_db_filename, make_db_file_path, split_path +from swift.common.db import DatabaseBroker, utf8encode, BROKER_TIMEOUT, \ + zero_like, DatabaseAlreadyExists SQLITE_ARG_LIMIT = 999 DATADIR = 'containers' +RECORD_TYPE_OBJECT = 'object' +RECORD_TYPE_SHARD = 'shard' +SHARD_RANGE_TABLE = 'shard_range' + +NOTFOUND = 'not_found' +UNSHARDED = 'unsharded' +SHARDING = 'sharding' +SHARDED = 'sharded' +COLLAPSED = 'collapsed' + + +SHARD_STATS_STATES = [ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING] +SHARD_LISTING_STATES = SHARD_STATS_STATES + [ShardRange.CLEAVED] +SHARD_UPDATE_STATES = [ShardRange.CREATED, ShardRange.CLEAVED, + ShardRange.ACTIVE, ShardRange.SHARDING] + + +# attribute names in order used when transforming shard ranges from dicts to +# tuples and vice-versa +SHARD_RANGE_KEYS = ('name', 'timestamp', 'lower', 'upper', 'object_count', + 'bytes_used', 'meta_timestamp', 'deleted', 'state', + 'state_timestamp', 'epoch') + POLICY_STAT_TABLE_CREATE = ''' CREATE TABLE policy_stat ( storage_policy_index INTEGER PRIMARY KEY, @@ -221,12 +251,220 @@ def update_new_item_from_existing(new_item, existing): return any(newer_than_existing) +def merge_shards(shard_data, existing): + """ + Compares ``shard_data`` with ``existing`` and updates ``shard_data`` with + any items of ``existing`` that take precedence over the corresponding item + in ``shard_data``. + + :param shard_data: a dict representation of shard range that may be + modified by this method. + :param existing: a dict representation of shard range. + :returns: True if ``shard data`` has any item(s) that are considered to + take precedence over the corresponding item in ``existing`` + """ + if not existing: + return True + if existing['timestamp'] < shard_data['timestamp']: + # note that currently we do not roll forward any meta or state from + # an item that was created at older time, newer created time trumps + return True + elif existing['timestamp'] > shard_data['timestamp']: + return False + + new_content = False + # timestamp must be the same, so preserve existing range bounds and deleted + for k in ('lower', 'upper', 'deleted'): + shard_data[k] = existing[k] + + # now we need to look for meta data updates + if existing['meta_timestamp'] >= shard_data['meta_timestamp']: + for k in ('object_count', 'bytes_used', 'meta_timestamp'): + shard_data[k] = existing[k] + else: + new_content = True + + if (existing['state_timestamp'] == shard_data['state_timestamp'] + and shard_data['state'] > existing['state']): + new_content = True + elif existing['state_timestamp'] >= shard_data['state_timestamp']: + for k in ('state', 'state_timestamp', 'epoch'): + shard_data[k] = existing[k] + else: + new_content = True + return new_content + + class ContainerBroker(DatabaseBroker): - """Encapsulates working with a container database.""" + """ + Encapsulates working with a container database. + + Note that this may involve multiple on-disk DB files if the container + becomes sharded: + + * :attr:`_db_file` is the path to the legacy container DB name, i.e. + ``.db``. This file should exist for an initialised broker that + has never been sharded, but will not exist once a container has been + sharded. + * :attr:`db_files` is a list of existing db files for the broker. This + list should have at least one entry for an initialised broker, and + should have two entries while a broker is in SHARDING state. + * :attr:`db_file` is the path to whichever db is currently authoritative + for the container. Depending on the container's state, this may not be + the same as the ``db_file`` argument given to :meth:`~__init__`, unless + ``force_db_file`` is True in which case :attr:`db_file` is always equal + to the ``db_file`` argument given to :meth:`~__init__`. + * :attr:`pending_file` is always equal to :attr:`_db_file` extended with + ``.pending``, i.e. ``.db.pending``. + """ db_type = 'container' db_contains_type = 'object' db_reclaim_timestamp = 'created_at' + def __init__(self, db_file, timeout=BROKER_TIMEOUT, logger=None, + account=None, container=None, pending_timeout=None, + stale_reads_ok=False, skip_commits=False, + force_db_file=False): + self._init_db_file = db_file + if db_file == ':memory:': + base_db_file = db_file + else: + db_dir = os.path.dirname(db_file) + hash_, other, ext = parse_db_filename(db_file) + base_db_file = os.path.join(db_dir, hash_ + ext) + super(ContainerBroker, self).__init__( + base_db_file, timeout, logger, account, container, pending_timeout, + stale_reads_ok, skip_commits=skip_commits) + # the root account and container are populated on demand + self._root_account = self._root_container = None + self._force_db_file = force_db_file + self._db_files = None + + @classmethod + def create_broker(self, device_path, part, account, container, logger=None, + epoch=None, put_timestamp=None, + storage_policy_index=None): + """ + Create a ContainerBroker instance. If the db doesn't exist, initialize + the db file. + + :param device_path: device path + :param part: partition number + :param account: account name string + :param container: container name string + :param logger: a logger instance + :param epoch: a timestamp to include in the db filename + :param put_timestamp: initial timestamp if broker needs to be + initialized + :param storage_policy_index: the storage policy index + :return: a :class:`swift.container.backend.ContainerBroker` instance + """ + hsh = hash_path(account, container) + db_dir = storage_directory(DATADIR, part, hsh) + db_path = os.path.join(device_path, db_dir, hsh + '.db') + if epoch: + db_path = make_db_file_path(db_path, epoch) + broker = ContainerBroker(db_path, account=account, container=container, + logger=logger) + if not os.path.exists(broker.db_file): + try: + broker.initialize(put_timestamp, storage_policy_index) + except DatabaseAlreadyExists: + pass + return broker + + def get_db_state(self): + """ + Returns the current state of on disk db files. + """ + if self._db_file == ':memory:': + return UNSHARDED + if not self.db_files: + return NOTFOUND + if len(self.db_files) > 1: + return SHARDING + if self.db_epoch is None: + # never been sharded + return UNSHARDED + if self.db_epoch != self._own_shard_range().epoch: + return UNSHARDED + if not self.get_shard_ranges(): + return COLLAPSED + return SHARDED + + def sharding_initiated(self): + """ + Returns True if a broker has shard range state that would be necessary + for sharding to have been initiated, False otherwise. + """ + own_shard_range = self.get_own_shard_range() + if own_shard_range.state in (ShardRange.SHARDING, + ShardRange.SHRINKING, + ShardRange.SHARDED): + return bool(self.get_shard_ranges()) + return False + + def sharding_required(self): + """ + Returns True if a broker has shard range state that would be necessary + for sharding to have been initiated but has not yet completed sharding, + False otherwise. + """ + db_state = self.get_db_state() + return (db_state == SHARDING or + (db_state == UNSHARDED and self.sharding_initiated())) + + def is_sharded(self): + return self.get_db_state() == SHARDED + + def reload_db_files(self): + """ + Reloads the cached list of valid on disk db files for this broker. + """ + if self._db_file == ':memory:': + return + # reset connection so the next access will use the correct DB file + self.conn = None + self._db_files = get_db_files(self._init_db_file) + + @property + def db_files(self): + """ + Gets the cached list of valid db files that exist on disk for this + broker. + + The cached list may be refreshed by calling + :meth:`~swift.container.backend.ContainerBroker.reload_db_files`. + + :return: A list of paths to db files ordered by ascending epoch; + the list may be empty. + """ + if not self._db_files: + self.reload_db_files() + return self._db_files + + @property + def db_file(self): + """ + Get the path to the primary db file for this broker. This is typically + the db file for the most recent sharding epoch. However, if no db files + exist on disk, or if ``force_db_file`` was True when the broker was + constructed, then the primary db file is the file passed to the broker + constructor. + + :return: A path to a db file; the file does not necessarily exist. + """ + if self._force_db_file: + return self._init_db_file + if self.db_files: + return self.db_files[-1] + return self._init_db_file + + @property + def db_epoch(self): + hash_, epoch, ext = parse_db_filename(self.db_file) + return epoch + @property def storage_policy_index(self): if not hasattr(self, '_storage_policy_index'): @@ -234,6 +472,11 @@ class ContainerBroker(DatabaseBroker): self.get_info()['storage_policy_index'] return self._storage_policy_index + @property + def path(self): + self._populate_instance_cache() + return '%s/%s' % (self.account, self.container) + def _initialize(self, conn, put_timestamp, storage_policy_index): """ Create a brand new container database (tables, indices, triggers, etc.) @@ -250,6 +493,8 @@ class ContainerBroker(DatabaseBroker): self.create_policy_stat_table(conn, storage_policy_index) self.create_container_info_table(conn, put_timestamp, storage_policy_index) + self.create_shard_range_table(conn) + self._db_files = None def create_object_table(self, conn): """ @@ -331,6 +576,40 @@ class ContainerBroker(DatabaseBroker): VALUES (?) """, (storage_policy_index,)) + def create_shard_range_table(self, conn): + """ + Create the shard_range table which is specific to the container DB. + + :param conn: DB connection object + """ + # Use execute (not executescript) so we get the benefits of our + # GreenDBConnection. Creating a table requires a whole-DB lock; + # *any* in-progress cursor will otherwise trip a "database is locked" + # error. + conn.execute(""" + CREATE TABLE %s ( + ROWID INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT, + timestamp TEXT, + lower TEXT, + upper TEXT, + object_count INTEGER DEFAULT 0, + bytes_used INTEGER DEFAULT 0, + meta_timestamp TEXT, + deleted INTEGER DEFAULT 0, + state INTEGER, + state_timestamp TEXT, + epoch TEXT + ); + """ % SHARD_RANGE_TABLE) + + conn.execute(""" + CREATE TRIGGER shard_range_update BEFORE UPDATE ON %s + BEGIN + SELECT RAISE(FAIL, 'UPDATE not allowed; DELETE and INSERT'); + END; + """ % SHARD_RANGE_TABLE) + def get_db_version(self, conn): if self._db_version == -1: self._db_version = 0 @@ -340,6 +619,11 @@ class ContainerBroker(DatabaseBroker): self._db_version = 1 return self._db_version + def _get_deleted_key(self, connection): + if self.get_db_version(connection) < 1: + return '+deleted' + return 'deleted' + def _newid(self, conn): conn.execute(''' UPDATE container_stat @@ -383,12 +667,7 @@ class ContainerBroker(DatabaseBroker): 'ctype_timestamp': content_type_timestamp, 'meta_timestamp': meta_timestamp}) - def empty(self): - """ - Check if container DB is empty. - - :returns: True if the database has no active objects, False otherwise - """ + def _empty(self): self._commit_puts_stale_ok() with self.get() as conn: try: @@ -401,7 +680,27 @@ class ContainerBroker(DatabaseBroker): raise row = conn.execute( 'SELECT object_count from container_stat').fetchone() - return (row[0] == 0) + return zero_like(row[0]) + + def empty(self): + """ + Check if container DB is empty. + + This method uses more stringent checks on object count than + :meth:`is_deleted`: this method checks that there are no objects in any + policy; if the container is in the process of sharding then both fresh + and retiring databases are checked to be empty; if a root container has + shard ranges then they are checked to be empty. + + :returns: True if the database has no active objects, False otherwise + """ + if not all(broker._empty() for broker in self.get_brokers()): + return False + if self.is_root_container() and self.sharding_initiated(): + # sharded shards don't get updates from their shards so their shard + # usage should not be relied upon + return self.get_shard_usage()['object_count'] <= 0 + return True def delete_object(self, name, timestamp, storage_policy_index=0): """ @@ -447,6 +746,43 @@ class ContainerBroker(DatabaseBroker): 'meta_timestamp': meta_timestamp} self.put_record(record) + def remove_objects(self, lower, upper, max_row=None): + """ + Removes object records in the given namespace range from the object + table. + + Note that objects are removed regardless of their storage_policy_index. + + :param lower: defines the lower bound of object names that will be + removed; names greater than this value will be removed; names less + than or equal to this value will not be removed. + :param upper: defines the upper bound of object names that will be + removed; names less than or equal to this value will be removed; + names greater than this value will not be removed. The empty string + is interpreted as there being no upper bound. + :param max_row: if specified only rows less than or equal to max_row + will be removed + """ + query_conditions = [] + query_args = [] + if max_row is not None: + query_conditions.append('ROWID <= ?') + query_args.append(str(max_row)) + if lower: + query_conditions.append('name > ?') + query_args.append(lower) + if upper: + query_conditions.append('name <= ?') + query_args.append(upper) + + query = 'DELETE FROM object WHERE deleted in (0, 1)' + if query_conditions: + query += ' AND ' + ' AND '.join(query_conditions) + + with self.get() as conn: + conn.execute(query, query_args) + conn.commit() + def _is_deleted_info(self, object_count, put_timestamp, delete_timestamp, **kwargs): """ @@ -457,12 +793,17 @@ class ContainerBroker(DatabaseBroker): # The container is considered deleted if the delete_timestamp # value is greater than the put_timestamp, and there are no # objects in the container. - return (object_count in (None, '', 0, '0')) and ( + return zero_like(object_count) and ( Timestamp(delete_timestamp) > Timestamp(put_timestamp)) def _is_deleted(self, conn): """ - Check container_stat view and evaluate info. + Check if the DB is considered to be deleted. + + This object count used in this check is the same as the container + object count that would be returned in the result of :meth:`get_info` + and exposed to a client i.e. it is based on the container_stat view for + the current storage policy index or relevant shard range usage. :param conn: database conn @@ -471,8 +812,21 @@ class ContainerBroker(DatabaseBroker): info = conn.execute(''' SELECT put_timestamp, delete_timestamp, object_count FROM container_stat''').fetchone() + info = dict(info) + info.update(self._get_alternate_object_stats()[1]) return self._is_deleted_info(**info) + def is_reclaimable(self, now, reclaim_age): + with self.get() as conn: + info = conn.execute(''' + SELECT put_timestamp, delete_timestamp + FROM container_stat''').fetchone() + if (Timestamp(now - reclaim_age) > + Timestamp(info['delete_timestamp']) > + Timestamp(info['put_timestamp'])): + return self.empty() + return False + def get_info_is_deleted(self): """ Get the is_deleted status and info for the container. @@ -485,6 +839,73 @@ class ContainerBroker(DatabaseBroker): info = self.get_info() return info, self._is_deleted_info(**info) + def get_replication_info(self): + info = super(ContainerBroker, self).get_replication_info() + info['shard_max_row'] = self.get_max_row('shard_ranges') + return info + + def _do_get_info_query(self, conn): + data = None + trailing_sync = 'x_container_sync_point1, x_container_sync_point2' + trailing_pol = 'storage_policy_index' + errors = set() + while not data: + try: + data = conn.execute((''' + SELECT account, container, created_at, put_timestamp, + delete_timestamp, status_changed_at, + object_count, bytes_used, + reported_put_timestamp, reported_delete_timestamp, + reported_object_count, reported_bytes_used, hash, + id, %s, %s + FROM container_stat + ''') % (trailing_sync, trailing_pol)).fetchone() + except sqlite3.OperationalError as err: + err_msg = str(err) + if err_msg in errors: + # only attempt migration once + raise + errors.add(err_msg) + if 'no such column: storage_policy_index' in err_msg: + trailing_pol = '0 AS storage_policy_index' + elif 'no such column: x_container_sync_point' in err_msg: + trailing_sync = '-1 AS x_container_sync_point1, ' \ + '-1 AS x_container_sync_point2' + else: + raise + data = dict(data) + # populate instance cache + self._storage_policy_index = data['storage_policy_index'] + self.account = data['account'] + self.container = data['container'] + return data + + def _get_info(self): + self._commit_puts_stale_ok() + with self.get() as conn: + return self._do_get_info_query(conn) + + def _populate_instance_cache(self, conn=None): + # load cached instance attributes from the database if necessary + if self.container is None: + if conn: + self._do_get_info_query(conn) + else: + with self.get() as conn: + self._do_get_info_query(conn) + + def _get_alternate_object_stats(self): + state = self.get_db_state() + if state == SHARDING: + other_info = self.get_brokers()[0]._get_info() + stats = {'object_count': other_info['object_count'], + 'bytes_used': other_info['bytes_used']} + elif state == SHARDED and self.is_root_container(): + stats = self.get_shard_usage() + else: + stats = {} + return state, stats + def get_info(self): """ Get global data for the container. @@ -494,44 +915,14 @@ class ContainerBroker(DatabaseBroker): object_count, bytes_used, reported_put_timestamp, reported_delete_timestamp, reported_object_count, reported_bytes_used, hash, id, x_container_sync_point1, - x_container_sync_point2, and storage_policy_index. + x_container_sync_point2, and storage_policy_index, + db_state. """ - self._commit_puts_stale_ok() - with self.get() as conn: - data = None - trailing_sync = 'x_container_sync_point1, x_container_sync_point2' - trailing_pol = 'storage_policy_index' - errors = set() - while not data: - try: - data = conn.execute((''' - SELECT account, container, created_at, put_timestamp, - delete_timestamp, status_changed_at, - object_count, bytes_used, - reported_put_timestamp, reported_delete_timestamp, - reported_object_count, reported_bytes_used, hash, - id, %s, %s - FROM container_stat - ''') % (trailing_sync, trailing_pol)).fetchone() - except sqlite3.OperationalError as err: - err_msg = str(err) - if err_msg in errors: - # only attempt migration once - raise - errors.add(err_msg) - if 'no such column: storage_policy_index' in err_msg: - trailing_pol = '0 AS storage_policy_index' - elif 'no such column: x_container_sync_point' in err_msg: - trailing_sync = '-1 AS x_container_sync_point1, ' \ - '-1 AS x_container_sync_point2' - else: - raise - data = dict(data) - # populate instance cache - self._storage_policy_index = data['storage_policy_index'] - self.account = data['account'] - self.container = data['container'] - return data + data = self._get_info() + state, stats = self._get_alternate_object_stats() + data.update(stats) + data['db_state'] = state + return data def set_x_container_sync_points(self, sync_point1, sync_point2): with self.get() as conn: @@ -657,7 +1048,9 @@ class ContainerBroker(DatabaseBroker): conn.commit() def list_objects_iter(self, limit, marker, end_marker, prefix, delimiter, - path=None, storage_policy_index=0, reverse=False): + path=None, storage_policy_index=0, reverse=False, + include_deleted=False, since_row=None, + transform_func=None, all_policies=False): """ Get a list of objects sorted by name starting at marker onward, up to limit entries. Entries will begin with the prefix and will not @@ -672,10 +1065,29 @@ class ContainerBroker(DatabaseBroker): the path :param storage_policy_index: storage policy index for query :param reverse: reverse the result order. - + :param include_deleted: if True, include only deleted objects; if + False (default), include only undeleted objects; otherwise, include + both deleted and undeleted objects. + :param since_row: include only items whose ROWID is greater than + the given row id; by default all rows are included. + :param transform_func: an optional function that if given will be + called for each object to get a transformed version of the object + to include in the listing; should have same signature as + :meth:`~_transform_record`; defaults to :meth:`~_transform_record`. + :param all_policies: if True, include objects for all storage policies + ignoring any value given for ``storage_policy_index`` :returns: list of tuples of (name, created_at, size, content_type, - etag) + etag, deleted) """ + if include_deleted is True: + deleted_arg = ' = 1' + elif include_deleted is False: + deleted_arg = ' = 0' + else: + deleted_arg = ' in (0, 1)' + + if transform_func is None: + transform_func = self._transform_record delim_force_gte = False (marker, end_marker, prefix, delimiter, path) = utf8encode( marker, end_marker, prefix, delimiter, path) @@ -695,60 +1107,71 @@ class ContainerBroker(DatabaseBroker): orig_marker = marker with self.get() as conn: results = [] + deleted_key = self._get_deleted_key(conn) + query_keys = ['name', 'created_at', 'size', 'content_type', + 'etag', deleted_key] while len(results) < limit: - query = '''SELECT name, created_at, size, content_type, etag - FROM object WHERE''' query_args = [] + query_conditions = [] if end_marker and (not prefix or end_marker < end_prefix): - query += ' name < ? AND' + query_conditions.append('name < ?') query_args.append(end_marker) elif prefix: - query += ' name < ? AND' + query_conditions.append('name < ?') query_args.append(end_prefix) if delim_force_gte: - query += ' name >= ? AND' + query_conditions.append('name >= ?') query_args.append(marker) # Always set back to False delim_force_gte = False elif marker and marker >= prefix: - query += ' name > ? AND' + query_conditions.append('name > ?') query_args.append(marker) elif prefix: - query += ' name >= ? AND' + query_conditions.append('name >= ?') query_args.append(prefix) - if self.get_db_version(conn) < 1: - query += ' +deleted = 0' - else: - query += ' deleted = 0' - orig_tail_query = ''' - ORDER BY name %s LIMIT ? - ''' % ('DESC' if reverse else '') - orig_tail_args = [limit - len(results)] + query_conditions.append(deleted_key + deleted_arg) + if since_row: + query_conditions.append('ROWID > ?') + query_args.append(since_row) + + def build_query(keys, conditions, args): + query = 'SELECT ' + ', '.join(keys) + ' FROM object ' + if conditions: + query += 'WHERE ' + ' AND '.join(conditions) + tail_query = ''' + ORDER BY name %s LIMIT ? + ''' % ('DESC' if reverse else '') + return query + tail_query, args + [limit - len(results)] + # storage policy filter - policy_tail_query = ''' - AND storage_policy_index = ? - ''' + orig_tail_query - policy_tail_args = [storage_policy_index] + orig_tail_args - tail_query, tail_args = \ - policy_tail_query, policy_tail_args + if all_policies: + query, args = build_query( + query_keys + ['storage_policy_index'], + query_conditions, + query_args) + else: + query, args = build_query( + query_keys + ['storage_policy_index'], + query_conditions + ['storage_policy_index = ?'], + query_args + [storage_policy_index]) try: - curs = conn.execute(query + tail_query, - tuple(query_args + tail_args)) + curs = conn.execute(query, tuple(args)) except sqlite3.OperationalError as err: if 'no such column: storage_policy_index' not in str(err): raise - tail_query, tail_args = \ - orig_tail_query, orig_tail_args - curs = conn.execute(query + tail_query, - tuple(query_args + tail_args)) + query, args = build_query( + query_keys + ['0 as storage_policy_index'], + query_conditions, query_args) + curs = conn.execute(query, tuple(args)) curs.row_factory = None # Delimiters without a prefix is ignored, further if there # is no delimiter then we can simply return the result as # prefixes are now handled in the SQL statement. if prefix is None or not delimiter: - return [self._transform_record(r) for r in curs] + return [transform_func(r) for r in curs] # We have a delimiter and a prefix (possibly empty string) to # handle @@ -787,19 +1210,51 @@ class ContainerBroker(DatabaseBroker): results.append([dir_name, '0', 0, None, '']) curs.close() break - results.append(self._transform_record(row)) + results.append(transform_func(row)) if not rowcount: break return results + def get_objects(self, limit=None, marker='', end_marker='', + include_deleted=None, since_row=None): + """ + Returns a list of objects, including deleted objects, in all policies. + Each object in the list is described by a dict with keys {'name', + 'created_at', 'size', 'content_type', 'etag', 'deleted', + 'storage_policy_index'}. + + :param limit: maximum number of entries to get + :param marker: if set, objects with names less than or equal to this + value will not be included in the list. + :param end_marker: if set, objects with names greater than or equal to + this value will not be included in the list. + :param include_deleted: if True, include only deleted objects; if + False, include only undeleted objects; otherwise (default), include + both deleted and undeleted objects. + :param since_row: include only items whose ROWID is greater than + the given row id; by default all rows are included. + :return: a list of dicts, each describing an object. + """ + + limit = CONTAINER_LISTING_LIMIT if limit is None else limit + return self.list_objects_iter( + limit, marker, end_marker, prefix=None, delimiter=None, path=None, + reverse=False, include_deleted=include_deleted, + transform_func=self._record_to_dict, since_row=since_row, + all_policies=True + ) + def _transform_record(self, record): """ - Decode the created_at timestamp into separate data, content-type and - meta timestamps and replace the created_at timestamp with the - metadata timestamp i.e. the last-modified time. + Returns a tuple of (name, last-modified time, size, content_type and + etag) for the given record. + + The given record's created_at timestamp is decoded into separate data, + content-type and meta timestamps and the metadata timestamp is used as + the last-modified time value. """ t_data, t_ctype, t_meta = decode_timestamps(record[1]) - return (record[0], t_meta.internal) + record[2:] + return (record[0], t_meta.internal) + record[2:5] def _record_to_dict(self, rec): if rec: @@ -822,7 +1277,7 @@ class ContainerBroker(DatabaseBroker): if isinstance(item['name'], six.text_type): item['name'] = item['name'].encode('utf-8') - def _really_merge_items(conn): + def _really_really_merge_items(conn): curs = conn.cursor() if self.get_db_version(conn) >= 1: query_mod = ' deleted IN (0, 1) AND ' @@ -885,6 +1340,9 @@ class ContainerBroker(DatabaseBroker): ''', (sync_point, source)) conn.commit() + def _really_merge_items(conn): + return tpool.execute(_really_really_merge_items, conn) + with self.get() as conn: try: return _really_merge_items(conn) @@ -894,6 +1352,86 @@ class ContainerBroker(DatabaseBroker): self._migrate_add_storage_policy(conn) return _really_merge_items(conn) + def merge_shard_ranges(self, shard_ranges): + """ + Merge shard ranges into the shard range table. + + :param shard_ranges: a shard range or a list of shard ranges; each + shard range should be an instance of + :class:`~swift.common.utils.ShardRange` or a dict representation of + a shard range having ``SHARD_RANGE_KEYS``. + """ + if not shard_ranges: + return + if not isinstance(shard_ranges, list): + shard_ranges = [shard_ranges] + + item_list = [] + for item in shard_ranges: + if isinstance(item, ShardRange): + item = dict(item) + for col in ('name', 'lower', 'upper'): + if isinstance(item[col], six.text_type): + item[col] = item[col].encode('utf-8') + item_list.append(item) + + def _really_merge_items(conn): + curs = conn.cursor() + curs.execute('BEGIN IMMEDIATE') + + # Get rows for items that already exist. + # We must chunk it up to avoid sqlite's limit of 999 args. + records = {} + for offset in range(0, len(item_list), SQLITE_ARG_LIMIT): + chunk = [record['name'] for record + in item_list[offset:offset + SQLITE_ARG_LIMIT]] + records.update( + (rec[0], rec) for rec in curs.execute( + 'SELECT %s FROM %s ' + 'WHERE deleted IN (0, 1) AND name IN (%s)' % + (', '.join(SHARD_RANGE_KEYS), SHARD_RANGE_TABLE, + ','.join('?' * len(chunk))), chunk)) + + # Sort item_list into things that need adding and deleting + to_delete = {} + to_add = {} + for item in item_list: + item_ident = item['name'] + existing = records.get(item_ident) + if existing: + existing = dict(zip(SHARD_RANGE_KEYS, existing)) + if merge_shards(item, existing): + # exists with older timestamp + if item_ident in records: + to_delete[item_ident] = item + # duplicate entries in item_list + if (item_ident not in to_add or + merge_shards(item, to_add[item_ident])): + to_add[item_ident] = item + + if to_delete: + curs.executemany( + 'DELETE FROM %s WHERE deleted in (0, 1) ' + 'AND name = ?' % SHARD_RANGE_TABLE, + ((item_ident,) for item_ident in to_delete)) + if to_add: + vals = ','.join('?' * len(SHARD_RANGE_KEYS)) + curs.executemany( + 'INSERT INTO %s (%s) VALUES (%s)' % + (SHARD_RANGE_TABLE, ','.join(SHARD_RANGE_KEYS), vals), + tuple([item[k] for k in SHARD_RANGE_KEYS] + for item in to_add.values())) + conn.commit() + + with self.get() as conn: + try: + return _really_merge_items(conn) + except sqlite3.OperationalError as err: + if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err): + raise + self.create_shard_range_table(conn) + return _really_merge_items(conn) + def get_reconciler_sync(self): with self.get() as conn: try: @@ -1039,3 +1577,644 @@ class ContainerBroker(DatabaseBroker): ''' % (column_names, column_names) + CONTAINER_STAT_VIEW_SCRIPT + 'COMMIT;') + + def _reclaim(self, conn, age_timestamp, sync_timestamp): + super(ContainerBroker, self)._reclaim(conn, age_timestamp, + sync_timestamp) + # populate instance cache, but use existing conn to avoid deadlock + # when it has a pending update + self._populate_instance_cache(conn=conn) + try: + conn.execute(''' + DELETE FROM %s WHERE deleted = 1 AND timestamp < ? + AND name != ? + ''' % SHARD_RANGE_TABLE, (sync_timestamp, self.path)) + except sqlite3.OperationalError as err: + if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err): + raise + + def _get_shard_range_rows(self, connection=None, include_deleted=False, + states=None, exclude_states=None, + include_own=False, exclude_others=False): + """ + Returns a list of shard range rows. + + To get all shard ranges use ``include_own=True``. To get only the + broker's own shard range use ``include_own=True`` and + ``exclude_others=True``. + + :param connection: db connection + :param include_deleted: include rows marked as deleted + :param states: include only rows matching the given state(s); can be an + int or a list of ints. + :param exclude_states: exclude rows matching the given state(s); can be + an int or a list of ints; takes precedence over ``state``. + :param include_own: boolean that governs whether the row whose name + matches the broker's path is included in the returned list. If + True, that row is included, otherwise it is not included. Default + is False. + :param exclude_others: boolean that governs whether the rows whose + names do not match the broker's path are included in the returned + list. If True, those rows are not included, otherwise they are + included. Default is False. + :return: a list of tuples. + """ + + if exclude_others and not include_own: + return [] + + def prep_states(states): + state_set = set() + if isinstance(states, (list, tuple, set)): + state_set.update(states) + elif states is not None: + state_set.add(states) + return state_set + + excluded_states = prep_states(exclude_states) + included_states = prep_states(states) + included_states -= excluded_states + + def do_query(conn): + try: + condition = '' + conditions = [] + params = [] + if not include_deleted: + conditions.append('deleted=0') + if included_states: + conditions.append('state in (%s)' % ','.join( + '?' * len(included_states))) + params.extend(included_states) + if excluded_states: + conditions.append('state not in (%s)' % ','.join( + '?' * len(excluded_states))) + params.extend(excluded_states) + if not include_own: + conditions.append('name != ?') + params.append(self.path) + if exclude_others: + conditions.append('name = ?') + params.append(self.path) + if conditions: + condition = ' WHERE ' + ' AND '.join(conditions) + sql = ''' + SELECT %s + FROM %s%s; + ''' % (', '.join(SHARD_RANGE_KEYS), SHARD_RANGE_TABLE, + condition) + data = conn.execute(sql, params) + data.row_factory = None + return [row for row in data] + except sqlite3.OperationalError as err: + if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err): + raise + return [] + + if connection: + return do_query(connection) + else: + with self.get() as conn: + return do_query(conn) + + @classmethod + def resolve_shard_range_states(cls, states): + """ + Given a list of values each of which may be the name of a state, the + number of a state, or an alias, return the set of state numbers + described by the list. + + The following alias values are supported: 'listing' maps to all states + that are considered valid when listing objects; 'updating' maps to all + states that are considered valid for redirecting an object update. + + :param states: a list of values each of which may be the name of a + state, the number of a state, or an alias + :return: a set of integer state numbers, or None if no states are given + :raises ValueError: if any value in the given list is neither a valid + state nor a valid alias + """ + if states: + resolved_states = set() + for state in states: + if state == 'listing': + resolved_states.update(SHARD_LISTING_STATES) + elif state == 'updating': + resolved_states.update(SHARD_UPDATE_STATES) + else: + resolved_states.add(ShardRange.resolve_state(state)[0]) + return resolved_states + return None + + def get_shard_ranges(self, marker=None, end_marker=None, includes=None, + reverse=False, include_deleted=False, states=None, + exclude_states=None, include_own=False, + exclude_others=False, fill_gaps=False): + """ + Returns a list of persisted shard ranges. + + :param marker: restricts the returned list to shard ranges whose + namespace includes or is greater than the marker value. + :param end_marker: restricts the returned list to shard ranges whose + namespace includes or is less than the end_marker value. + :param includes: restricts the returned list to the shard range that + includes the given value; if ``includes`` is specified then + ``marker`` and ``end_marker`` are ignored. + :param reverse: reverse the result order. + :param include_deleted: include items that have the delete marker set + :param states: if specified, restricts the returned list to shard + ranges that have the given state(s); can be a list of ints or a + single int. + :param exclude_states: exclude rows matching the given state(s); can be + an int or a list of ints; takes precedence over ``state``. + :param include_own: boolean that governs whether the row whose name + matches the broker's path is included in the returned list. If + True, that row is included, otherwise it is not included. Default + is False. + :param exclude_others: boolean that governs whether the rows whose + names do not match the broker's path are included in the returned + list. If True, those rows are not included, otherwise they are + included. Default is False. + :param fill_gaps: if True, insert own shard range to fill any gaps in + at the tail of other shard ranges. + :return: a list of instances of :class:`swift.common.utils.ShardRange` + """ + def shard_range_filter(sr): + end = start = True + if end_marker: + end = end_marker > sr.lower + if marker: + start = marker < sr.upper + return start and end + + if reverse: + marker, end_marker = end_marker, marker + if marker and end_marker and marker >= end_marker: + return [] + + shard_ranges = [ + ShardRange(*row) + for row in self._get_shard_range_rows( + include_deleted=include_deleted, states=states, + exclude_states=exclude_states, include_own=include_own, + exclude_others=exclude_others)] + # note if this ever changes to *not* sort by upper first then it breaks + # a key assumption for bisect, which is used by utils.find_shard_ranges + shard_ranges.sort(key=lambda sr: (sr.upper, sr.state, sr.lower)) + if includes: + shard_range = find_shard_range(includes, shard_ranges) + return [shard_range] if shard_range else [] + + if reverse: + shard_ranges.reverse() + if marker or end_marker: + shard_ranges = list(filter(shard_range_filter, shard_ranges)) + + if fill_gaps: + if reverse: + if shard_ranges: + last_upper = shard_ranges[0].upper + else: + last_upper = marker or ShardRange.MIN + required_upper = end_marker or ShardRange.MAX + filler_index = 0 + else: + if shard_ranges: + last_upper = shard_ranges[-1].upper + else: + last_upper = marker or ShardRange.MIN + required_upper = end_marker or ShardRange.MAX + filler_index = len(shard_ranges) + if required_upper > last_upper: + filler_sr = self.get_own_shard_range() + filler_sr.lower = last_upper + filler_sr.upper = required_upper + shard_ranges.insert(filler_index, filler_sr) + + return shard_ranges + + def _own_shard_range(self, no_default=False): + shard_ranges = self.get_shard_ranges(include_own=True, + include_deleted=True, + exclude_others=True) + if shard_ranges: + own_shard_range = shard_ranges[0] + elif no_default: + return None + else: + own_shard_range = ShardRange( + self.path, Timestamp.now(), ShardRange.MIN, ShardRange.MAX, + state=ShardRange.ACTIVE) + return own_shard_range + + def get_own_shard_range(self, no_default=False): + """ + Returns a shard range representing this broker's own shard range. If no + such range has been persisted in the broker's shard ranges table then a + default shard range representing the entire namespace will be returned. + + The returned shard range will be updated with the current object stats + for this broker and a meta timestamp set to the current time. For these + values to be persisted the caller must merge the shard range. + + :param no_default: if True and the broker's own shard range is not + found in the shard ranges table then None is returned, otherwise a + default shard range is returned. + :return: an instance of :class:`~swift.common.utils.ShardRange` + """ + own_shard_range = self._own_shard_range(no_default=no_default) + if own_shard_range: + info = self.get_info() + own_shard_range.update_meta( + info['object_count'], info['bytes_used']) + return own_shard_range + + def is_own_shard_range(self, shard_range): + return shard_range.name == self.path + + def enable_sharding(self, epoch): + """ + Updates this broker's own shard range with the given epoch, sets its + state to SHARDING and persists it in the DB. + + :param epoch: a :class:`~swift.utils.common.Timestamp` + :return: the broker's updated own shard range. + """ + own_shard_range = self._own_shard_range() + own_shard_range.update_state(ShardRange.SHARDING, epoch) + own_shard_range.epoch = epoch + self.merge_shard_ranges(own_shard_range) + return own_shard_range + + def get_shard_usage(self): + """ + Get the aggregate object stats for all shard ranges in states ACTIVE, + SHARDING or SHRINKING. + + :return: a dict with keys {bytes_used, object_count} + """ + shard_ranges = self.get_shard_ranges(states=SHARD_STATS_STATES) + return {'bytes_used': sum(sr.bytes_used for sr in shard_ranges), + 'object_count': sum(sr.object_count for sr in shard_ranges)} + + def get_all_shard_range_data(self): + """ + Returns a list of all shard range data, including own shard range and + deleted shard ranges. + + :return: A list of dict representations of a ShardRange. + """ + shard_ranges = self.get_shard_ranges(include_deleted=True, + include_own=True) + return [dict(sr) for sr in shard_ranges] + + def set_sharding_state(self): + """ + Creates and initializes a fresh DB file in preparation for sharding a + retiring DB. The broker's own shard range must have an epoch timestamp + for this method to succeed. + + :return: True if the fresh DB was successfully created, False + otherwise. + """ + epoch = self.get_own_shard_range().epoch + if not epoch: + self.logger.warning("Container '%s' cannot be set to sharding " + "state: missing epoch", self.path) + return False + state = self.get_db_state() + if not state == UNSHARDED: + self.logger.warning("Container '%s' cannot be set to sharding " + "state while in %s state", self.path, state) + return False + + info = self.get_info() + # The tmp_dir is cleaned up by the replicators after reclaim_age, so if + # we initially create the fresh DB there, we will already have cleanup + # covered if there is an error. + tmp_dir = os.path.join(self.get_device_path(), 'tmp') + if not os.path.exists(tmp_dir): + mkdirs(tmp_dir) + tmp_db_file = os.path.join(tmp_dir, "fresh%s.db" % str(uuid4())) + fresh_broker = ContainerBroker(tmp_db_file, self.timeout, self.logger, + self.account, self.container) + fresh_broker.initialize(info['put_timestamp'], + info['storage_policy_index']) + # copy relevant data from the retiring db to the fresh db + fresh_broker.update_metadata(self.metadata) + fresh_broker.merge_shard_ranges(self.get_all_shard_range_data()) + # copy sync points so that any peer in sync with retiring db will + # appear to be in sync with the fresh db, although the peer shouldn't + # attempt to replicate objects to a db with shard ranges. + for incoming in (True, False): + syncs = self.get_syncs(incoming) + fresh_broker.merge_syncs(syncs, incoming) + + max_row = self.get_max_row() + with fresh_broker.get() as fresh_broker_conn: + # Initialise the rowid to continue from where the retiring db ended + try: + sql = "INSERT into object " \ + "(ROWID, name, created_at, size, content_type, etag) " \ + "values (?, 'tmp_sharding', ?, 0, '', ?)" + fresh_broker_conn.execute( + sql, (max_row, Timestamp.now().internal, + MD5_OF_EMPTY_STRING)) + fresh_broker_conn.execute( + 'DELETE FROM object WHERE ROWID = ?', (max_row,)) + fresh_broker_conn.commit() + except sqlite3.OperationalError as err: + self.logger.error( + 'Failed to set the ROWID of the fresh database for %s: %s', + self.path, err) + return False + + # Set the created_at and hash in the container_info table the same + # in both brokers + try: + fresh_broker_conn.execute( + 'UPDATE container_stat SET created_at=?', + (info['created_at'],)) + fresh_broker_conn.commit() + except sqlite3.OperationalError as err: + self.logger.error('Failed to set matching created_at time in ' + 'the fresh database for %s: %s', + self.path, err) + return False + + # Rename to the new database + fresh_db_filename = make_db_file_path(self._db_file, epoch) + renamer(tmp_db_file, fresh_db_filename) + self.reload_db_files() + return True + + def set_sharded_state(self): + """ + Unlink's the broker's retiring DB file. + + :return: True if the retiring DB was successfully unlinked, False + otherwise. + """ + state = self.get_db_state() + if not state == SHARDING: + self.logger.warning("Container %r cannot be set to sharded " + "state while in %s state", + self.path, state) + return False + + self.reload_db_files() + if len(self.db_files) < 2: + self.logger.warning( + 'Refusing to delete db file for %r: no fresher db file found ' + 'in %r.', self.path, self.db_files) + return False + + retiring_file = self.db_files[-2] + try: + os.unlink(retiring_file) + self.logger.debug('Unlinked retiring db %r', retiring_file) + except OSError as err: + if err.errno != errno.ENOENT: + self.logger.exception('Failed to unlink %r' % self._db_file) + return False + + self.reload_db_files() + if len(self.db_files) >= 2: + self.logger.warning( + 'Still have multiple db files after unlinking %r: %r', + retiring_file, self.db_files) + return False + + return True + + def get_brokers(self): + """ + Return a list of brokers for component dbs. The list has two entries + while the db state is sharding: the first entry is a broker for the + retiring db with ``skip_commits`` set to ``True``; the second entry is + a broker for the fresh db with ``skip_commits`` set to ``False``. For + any other db state the list has one entry. + + :return: a list of :class:`~swift.container.backend.ContainerBroker` + """ + if len(self.db_files) > 2: + self.logger.warning('Unexpected db files will be ignored: %s' % + self.db_files[:-2]) + brokers = [] + db_files = self.db_files[-2:] + while db_files: + db_file = db_files.pop(0) + sub_broker = ContainerBroker( + db_file, self.timeout, self.logger, self.account, + self.container, self.pending_timeout, self.stale_reads_ok, + force_db_file=True, skip_commits=bool(db_files)) + brokers.append(sub_broker) + return brokers + + def set_sharding_sysmeta(self, key, value): + """ + Updates the broker's metadata metadata stored under the given key + prefixed with a sharding specific namespace. + + :param key: metadata key in the sharding metadata namespace. + :param value: metadata value + """ + self.update_metadata({'X-Container-Sysmeta-Shard-' + key: + (value, Timestamp.now().internal)}) + + def get_sharding_sysmeta(self, key=None): + """ + Returns sharding specific info from the broker's metadata. + + :param key: if given the value stored under ``key`` in the sharding + info will be returned. + :return: either a dict of sharding info or the value stored under + ``key`` in that dict. + """ + prefix = 'X-Container-Sysmeta-Shard-' + metadata = self.metadata + info = dict((k[len(prefix):], v[0]) for + k, v in metadata.items() if k.startswith(prefix)) + if key: + return info.get(key) + return info + + def _load_root_info(self): + """ + Load the root container name and account for the container represented + by this broker. + + The root container path, if set, is stored in sysmeta under the key + ``X-Container-Sysmeta-Shard-Root``. If this sysmeta is not set then the + container is considered to be a root container and ``_root_account`` + and ``_root_container`` are set equal to the broker ``account`` and + ``container`` attributes respectively. + + """ + path = self.get_sharding_sysmeta('Root') + if not path: + # Ensure account/container get populated + self._populate_instance_cache() + self._root_account = self.account + self._root_container = self.container + return + + try: + self._root_account, self._root_container = split_path( + '/' + path, 2, 2) + except ValueError: + raise ValueError("Expected X-Container-Sysmeta-Shard-Root to be " + "of the form 'account/container', got %r" % path) + + @property + def root_account(self): + if not self._root_account: + self._load_root_info() + return self._root_account + + @property + def root_container(self): + if not self._root_container: + self._load_root_info() + return self._root_container + + @property + def root_path(self): + return '%s/%s' % (self.root_account, self.root_container) + + def is_root_container(self): + """ + Returns True if this container is a root container, False otherwise. + + A root container is a container that is not a shard of another + container. + """ + self._populate_instance_cache() + return (self.root_account == self.account and + self.root_container == self.container) + + def _get_next_shard_range_upper(self, shard_size, last_upper=None): + """ + Returns the name of the object that is ``shard_size`` rows beyond + ``last_upper`` in the object table ordered by name. If ``last_upper`` + is not given then it defaults to the start of object table ordered by + name. + + :param last_upper: the upper bound of the last found shard range. + :return: an object name, or None if the number of rows beyond + ``last_upper`` is less than ``shard_size``. + """ + self._commit_puts_stale_ok() + with self.get() as connection: + sql = ('SELECT name FROM object WHERE %s=0 ' % + self._get_deleted_key(connection)) + args = [] + if last_upper: + sql += "AND name > ? " + args.append(str(last_upper)) + sql += "ORDER BY name LIMIT 1 OFFSET %d" % (shard_size - 1) + row = connection.execute(sql, args).fetchone() + return row['name'] if row else None + + def find_shard_ranges(self, shard_size, limit=-1, existing_ranges=None): + """ + Scans the container db for shard ranges. Scanning will start at the + upper bound of the any ``existing_ranges`` that are given, otherwise + at ``ShardRange.MIN``. Scanning will stop when ``limit`` shard ranges + have been found or when no more shard ranges can be found. In the + latter case, the upper bound of the final shard range will be equal to + the upper bound of the container namespace. + + This method does not modify the state of the db; callers are + responsible for persisting any shard range data in the db. + + :param shard_size: the size of each shard range + :param limit: the maximum number of shard points to be found; a + negative value (default) implies no limit. + :param existing_ranges: an optional list of existing ShardRanges; if + given, this list should be sorted in order of upper bounds; the + scan for new shard ranges will start at the upper bound of the last + existing ShardRange. + :return: a tuple; the first value in the tuple is a list of + dicts each having keys {'index', 'lower', 'upper', 'object_count'} + in order of ascending 'upper'; the second value in the tuple is a + boolean which is True if the last shard range has been found, False + otherwise. + """ + existing_ranges = existing_ranges or [] + object_count = self.get_info().get('object_count', 0) + if shard_size >= object_count: + # container not big enough to shard + return [], False + + own_shard_range = self.get_own_shard_range() + progress = 0 + progress_reliable = True + # update initial state to account for any existing shard ranges + if existing_ranges: + if all([sr.state == ShardRange.FOUND + for sr in existing_ranges]): + progress = sum([sr.object_count for sr in existing_ranges]) + else: + # else: object count in existing shard ranges may have changed + # since they were found so progress cannot be reliably + # calculated; use default progress of zero - that's ok, + # progress is used for optimisation not correctness + progress_reliable = False + last_shard_upper = existing_ranges[-1].upper + if last_shard_upper >= own_shard_range.upper: + # == implies all ranges were previously found + # > implies an acceptor range has been set into which this + # shard should cleave itself + return [], True + else: + last_shard_upper = own_shard_range.lower + + found_ranges = [] + sub_broker = self.get_brokers()[0] + index = len(existing_ranges) + while limit < 0 or len(found_ranges) < limit: + if progress + shard_size >= object_count: + # next shard point is at or beyond final object name so don't + # bother with db query + next_shard_upper = None + else: + try: + next_shard_upper = sub_broker._get_next_shard_range_upper( + shard_size, last_shard_upper) + except (sqlite3.OperationalError, LockTimeout): + self.logger.exception( + "Problem finding shard upper in %r: " % self.db_file) + break + + if (next_shard_upper is None or + next_shard_upper > own_shard_range.upper): + # We reached the end of the container namespace, or possibly + # beyond if the container has misplaced objects. In either case + # limit the final shard range to own_shard_range.upper. + next_shard_upper = own_shard_range.upper + if progress_reliable: + # object count may include misplaced objects so the final + # shard size may not be accurate until cleaved, but at + # least the sum of shard sizes will equal the unsharded + # object_count + shard_size = object_count - progress + + # NB shard ranges are created with a non-zero object count so that + # the apparent container object count remains constant, and the + # container is non-deletable while shards have been found but not + # yet cleaved + found_ranges.append( + {'index': index, + 'lower': str(last_shard_upper), + 'upper': str(next_shard_upper), + 'object_count': shard_size}) + + if next_shard_upper == own_shard_range.upper: + return found_ranges, True + + progress += shard_size + last_shard_upper = next_shard_upper + index += 1 + + return found_ranges, False diff --git a/swift/container/replicator.py b/swift/container/replicator.py index 41c048716d..b326ab70e3 100644 --- a/swift/container/replicator.py +++ b/swift/container/replicator.py @@ -26,11 +26,10 @@ from swift.container.reconciler import ( get_reconciler_container_name, get_row_to_q_entry_translator) from swift.common import db_replicator from swift.common.storage_policy import POLICIES +from swift.common.swob import HTTPOk, HTTPAccepted from swift.common.exceptions import DeviceUnavailable from swift.common.http import is_success -from swift.common.db import DatabaseAlreadyExists -from swift.common.utils import (Timestamp, hash_path, - storage_directory, majority_size) +from swift.common.utils import Timestamp, majority_size, get_db_files class ContainerReplicator(db_replicator.Replicator): @@ -39,6 +38,10 @@ class ContainerReplicator(db_replicator.Replicator): datadir = DATADIR default_port = 6201 + def __init__(self, conf, logger=None): + super(ContainerReplicator, self).__init__(conf, logger=logger) + self.reconciler_cleanups = self.sync_store = None + def report_up_to_date(self, full_info): reported_key_map = { 'reported_put_timestamp': 'put_timestamp', @@ -61,8 +64,7 @@ class ContainerReplicator(db_replicator.Replicator): return sync_args def _handle_sync_response(self, node, response, info, broker, http, - different_region): - parent = super(ContainerReplicator, self) + different_region=False): if is_success(response.status): remote_info = json.loads(response.data) if incorrect_policy_index(info, remote_info): @@ -75,9 +77,50 @@ class ContainerReplicator(db_replicator.Replicator): if any(info[key] != remote_info[key] for key in sync_timestamps): broker.merge_timestamps(*(remote_info[key] for key in sync_timestamps)) - rv = parent._handle_sync_response( + + # Grab remote's shard ranges, too + self._fetch_and_merge_shard_ranges(http, broker) + + return super(ContainerReplicator, self)._handle_sync_response( node, response, info, broker, http, different_region) - return rv + + def _sync_shard_ranges(self, broker, http, local_id): + # TODO: currently the number of shard ranges is expected to be _much_ + # less than normal objects so all are sync'd on each cycle. However, in + # future there should be sync points maintained much like for object + # syncing so that only new shard range rows are sync'd. + shard_range_data = broker.get_all_shard_range_data() + if shard_range_data: + if not self._send_replicate_request( + http, 'merge_shard_ranges', shard_range_data, local_id): + return False + self.logger.debug('%s synced %s shard ranges to %s', + broker.db_file, len(shard_range_data), + '%(ip)s:%(port)s/%(device)s' % http.node) + return True + + def _choose_replication_mode(self, node, rinfo, info, local_sync, broker, + http, different_region): + # Always replicate shard ranges + shard_range_success = self._sync_shard_ranges(broker, http, info['id']) + if broker.sharding_initiated(): + self.logger.warning( + '%s is able to shard -- refusing to replicate objects to peer ' + '%s; have shard ranges and will wait for cleaving', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % node) + self.stats['deferred'] += 1 + return shard_range_success + + success = super(ContainerReplicator, self)._choose_replication_mode( + node, rinfo, info, local_sync, broker, http, + different_region) + return shard_range_success and success + + def _fetch_and_merge_shard_ranges(self, http, broker): + response = http.replicate('get_shard_ranges') + if is_success(response.status): + broker.merge_shard_ranges(json.loads(response.data)) def find_local_handoff_for_part(self, part): """ @@ -114,15 +157,10 @@ class ContainerReplicator(db_replicator.Replicator): raise DeviceUnavailable( 'No mounted devices found suitable to Handoff reconciler ' 'container %s in partition %s' % (container, part)) - hsh = hash_path(account, container) - db_dir = storage_directory(DATADIR, part, hsh) - db_path = os.path.join(self.root, node['device'], db_dir, hsh + '.db') - broker = ContainerBroker(db_path, account=account, container=container) - if not os.path.exists(broker.db_file): - try: - broker.initialize(timestamp, 0) - except DatabaseAlreadyExists: - pass + broker = ContainerBroker.create_broker( + os.path.join(self.root, node['device']), part, account, container, + logger=self.logger, put_timestamp=timestamp, + storage_policy_index=0) if self.reconciler_containers is not None: self.reconciler_containers[container] = part, broker, node['id'] return broker @@ -207,6 +245,18 @@ class ContainerReplicator(db_replicator.Replicator): # replication broker.update_reconciler_sync(max_sync) + def cleanup_post_replicate(self, broker, orig_info, responses): + debug_template = 'Not deleting db %s (%%s)' % broker.db_file + if broker.sharding_required(): + # despite being a handoff, since we're sharding we're not going to + # do any cleanup so we can continue cleaving - this is still + # considered "success" + reason = 'requires sharding, state %s' % broker.get_db_state() + self.logger.debug(debug_template, reason) + return True + return super(ContainerReplicator, self).cleanup_post_replicate( + broker, orig_info, responses) + def delete_db(self, broker): """ Ensure that reconciler databases are only cleaned up at the end of the @@ -217,12 +267,13 @@ class ContainerReplicator(db_replicator.Replicator): # this container shouldn't be here, make sure it's cleaned up self.reconciler_cleanups[broker.container] = broker return - try: - # DB is going to get deleted. Be preemptive about it - self.sync_store.remove_synced_container(broker) - except Exception: - self.logger.exception('Failed to remove sync_store entry %s' % - broker.db_file) + if self.sync_store: + try: + # DB is going to get deleted. Be preemptive about it + self.sync_store.remove_synced_container(broker) + except Exception: + self.logger.exception('Failed to remove sync_store entry %s' % + broker.db_file) return super(ContainerReplicator, self).delete_db(broker) @@ -259,9 +310,20 @@ class ContainerReplicator(db_replicator.Replicator): self.replicate_reconcilers() return rv + def _in_sync(self, rinfo, info, broker, local_sync): + # TODO: don't always sync shard ranges! + if broker.get_shard_ranges(include_own=True, include_deleted=True): + return False + + return super(ContainerReplicator, self)._in_sync( + rinfo, info, broker, local_sync) + class ContainerReplicatorRpc(db_replicator.ReplicatorRpc): + def _db_file_exists(self, db_path): + return bool(get_db_files(db_path)) + def _parse_sync_args(self, args): parent = super(ContainerReplicatorRpc, self) remote_info = parent._parse_sync_args(args) @@ -289,3 +351,27 @@ class ContainerReplicatorRpc(db_replicator.ReplicatorRpc): timestamp=status_changed_at) info = broker.get_replication_info() return info + + def _abort_rsync_then_merge(self, db_file, old_filename): + if super(ContainerReplicatorRpc, self)._abort_rsync_then_merge( + db_file, old_filename): + return True + # if the local db has started sharding since the original 'sync' + # request then abort object replication now; instantiate a fresh broker + # each time this check if performed so to get latest state + broker = ContainerBroker(db_file) + return broker.sharding_initiated() + + def _post_rsync_then_merge_hook(self, existing_broker, new_broker): + # Note the following hook will need to change to using a pointer and + # limit in the future. + new_broker.merge_shard_ranges( + existing_broker.get_all_shard_range_data()) + + def merge_shard_ranges(self, broker, args): + broker.merge_shard_ranges(args[0]) + return HTTPAccepted() + + def get_shard_ranges(self, broker, args): + return HTTPOk(headers={'Content-Type': 'application/json'}, + body=json.dumps(broker.get_all_shard_range_data())) diff --git a/swift/container/server.py b/swift/container/server.py index c7df07ac8e..48a8d2c2e9 100644 --- a/swift/container/server.py +++ b/swift/container/server.py @@ -24,7 +24,8 @@ from eventlet import Timeout import swift.common.db from swift.container.sync_store import ContainerSyncStore -from swift.container.backend import ContainerBroker, DATADIR +from swift.container.backend import ContainerBroker, DATADIR, \ + RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, SHARD_UPDATE_STATES from swift.container.replicator import ContainerReplicatorRpc from swift.common.db import DatabaseAlreadyExists from swift.common.container_sync_realms import ContainerSyncRealms @@ -33,7 +34,8 @@ from swift.common.request_helpers import get_param, \ from swift.common.utils import get_logger, hash_path, public, \ Timestamp, storage_directory, validate_sync_to, \ config_true_value, timing_stats, replication, \ - override_bytes_from_content_type, get_log_line + override_bytes_from_content_type, get_log_line, ShardRange, list_from_csv + from swift.common.constraints import valid_timestamp, check_utf8, check_drive from swift.common import constraints from swift.common.bufferedhttp import http_connect @@ -46,7 +48,7 @@ from swift.common.header_key_dict import HeaderKeyDict from swift.common.swob import HTTPAccepted, HTTPBadRequest, HTTPConflict, \ HTTPCreated, HTTPInternalServerError, HTTPNoContent, HTTPNotFound, \ HTTPPreconditionFailed, HTTPMethodNotAllowed, Request, Response, \ - HTTPInsufficientStorage, HTTPException + HTTPInsufficientStorage, HTTPException, HTTPMovedPermanently def gen_resp_headers(info, is_deleted=False): @@ -72,6 +74,7 @@ def gen_resp_headers(info, is_deleted=False): 'X-Timestamp': Timestamp(info.get('created_at', 0)).normal, 'X-PUT-Timestamp': Timestamp( info.get('put_timestamp', 0)).normal, + 'X-Backend-Sharding-State': info.get('db_state', UNSHARDED), }) return headers @@ -261,6 +264,40 @@ class ContainerController(BaseStorageServer): self.logger.exception('Failed to update sync_store %s during %s' % (broker.db_file, method)) + def _redirect_to_shard(self, req, broker, obj_name): + """ + If the request indicates that it can accept a redirection, look for a + shard range that contains ``obj_name`` and if one exists return a + HTTPMovedPermanently response. + + :param req: an instance of :class:`~swift.common.swob.Request` + :param broker: a container broker + :param obj_name: an object name + :return: an instance of :class:`swift.common.swob.HTTPMovedPermanently` + if a shard range exists for the given ``obj_name``, otherwise None. + """ + if not config_true_value( + req.headers.get('x-backend-accept-redirect', False)): + return None + + shard_ranges = broker.get_shard_ranges( + includes=obj_name, states=SHARD_UPDATE_STATES) + if not shard_ranges: + return None + + # note: obj_name may be included in both a created sub-shard and its + # sharding parent. get_shard_ranges will return the created sub-shard + # in preference to the parent, which is the desired result. + containing_range = shard_ranges[0] + location = "/%s/%s" % (containing_range.name, obj_name) + headers = {'Location': location, + 'X-Backend-Redirect-Timestamp': + containing_range.timestamp.internal} + + # we do not want the host added to the location + req.environ['swift.leave_relative_location'] = True + return HTTPMovedPermanently(headers=headers, request=req) + @public @timing_stats() def DELETE(self, req): @@ -283,6 +320,11 @@ class ContainerController(BaseStorageServer): if not os.path.exists(broker.db_file): return HTTPNotFound() if obj: # delete object + # redirect if a shard range exists for the object name + redirect = self._redirect_to_shard(req, broker, obj) + if redirect: + return redirect + broker.delete_object(obj, req.headers.get('x-timestamp'), obj_policy_index) return HTTPNoContent(request=req) @@ -343,6 +385,40 @@ class ContainerController(BaseStorageServer): broker.update_status_changed_at(timestamp) return recreated + def _maybe_autocreate(self, broker, req_timestamp, account, + policy_index): + created = False + if account.startswith(self.auto_create_account_prefix) and \ + not os.path.exists(broker.db_file): + if policy_index is None: + raise HTTPBadRequest( + 'X-Backend-Storage-Policy-Index header is required') + try: + broker.initialize(req_timestamp.internal, policy_index) + except DatabaseAlreadyExists: + pass + else: + created = True + if not os.path.exists(broker.db_file): + raise HTTPNotFound() + return created + + def _update_metadata(self, req, broker, req_timestamp, method): + metadata = {} + metadata.update( + (key, (value, req_timestamp.internal)) + for key, value in req.headers.items() + if key.lower() in self.save_headers or + is_sys_or_user_meta('container', key)) + if metadata: + if 'X-Container-Sync-To' in metadata: + if 'X-Container-Sync-To' not in broker.metadata or \ + metadata['X-Container-Sync-To'][0] != \ + broker.metadata['X-Container-Sync-To'][0]: + broker.set_x_container_sync_points(-1, -1) + broker.update_metadata(metadata, validate_metadata=True) + self._update_sync_store(broker, method) + @public @timing_stats() def PUT(self, req): @@ -364,14 +440,13 @@ class ContainerController(BaseStorageServer): # obj put expects the policy_index header, default is for # legacy support during upgrade. obj_policy_index = requested_policy_index or 0 - if account.startswith(self.auto_create_account_prefix) and \ - not os.path.exists(broker.db_file): - try: - broker.initialize(req_timestamp.internal, obj_policy_index) - except DatabaseAlreadyExists: - pass - if not os.path.exists(broker.db_file): - return HTTPNotFound() + self._maybe_autocreate(broker, req_timestamp, account, + obj_policy_index) + # redirect if a shard exists for this object name + response = self._redirect_to_shard(req, broker, obj) + if response: + return response + broker.put_object(obj, req_timestamp.internal, int(req.headers['x-size']), req.headers['x-content-type'], @@ -380,6 +455,22 @@ class ContainerController(BaseStorageServer): req.headers.get('x-content-type-timestamp'), req.headers.get('x-meta-timestamp')) return HTTPCreated(request=req) + + record_type = req.headers.get('x-backend-record-type', '').lower() + if record_type == RECORD_TYPE_SHARD: + try: + # validate incoming data... + shard_ranges = [ShardRange.from_dict(sr) + for sr in json.loads(req.body)] + except (ValueError, KeyError, TypeError) as err: + return HTTPBadRequest('Invalid body: %r' % err) + created = self._maybe_autocreate(broker, req_timestamp, account, + requested_policy_index) + self._update_metadata(req, broker, req_timestamp, 'PUT') + if shard_ranges: + # TODO: consider writing the shard ranges into the pending + # file, but if so ensure an all-or-none semantic for the write + broker.merge_shard_ranges(shard_ranges) else: # put container if requested_policy_index is None: # use the default index sent by the proxy if available @@ -391,31 +482,18 @@ class ContainerController(BaseStorageServer): req_timestamp.internal, new_container_policy, requested_policy_index) - metadata = {} - metadata.update( - (key, (value, req_timestamp.internal)) - for key, value in req.headers.items() - if key.lower() in self.save_headers or - is_sys_or_user_meta('container', key)) - if 'X-Container-Sync-To' in metadata: - if 'X-Container-Sync-To' not in broker.metadata or \ - metadata['X-Container-Sync-To'][0] != \ - broker.metadata['X-Container-Sync-To'][0]: - broker.set_x_container_sync_points(-1, -1) - broker.update_metadata(metadata, validate_metadata=True) - if metadata: - self._update_sync_store(broker, 'PUT') + self._update_metadata(req, broker, req_timestamp, 'PUT') resp = self.account_update(req, account, container, broker) if resp: return resp - if created: - return HTTPCreated(request=req, - headers={'x-backend-storage-policy-index': - broker.storage_policy_index}) - else: - return HTTPAccepted(request=req, - headers={'x-backend-storage-policy-index': - broker.storage_policy_index}) + if created: + return HTTPCreated(request=req, + headers={'x-backend-storage-policy-index': + broker.storage_policy_index}) + else: + return HTTPAccepted(request=req, + headers={'x-backend-storage-policy-index': + broker.storage_policy_index}) @public @timing_stats(sample_rate=0.1) @@ -454,13 +532,18 @@ class ContainerController(BaseStorageServer): :params record: object entry record :returns: modified record """ - (name, created, size, content_type, etag) = record[:5] - if content_type is None: - return {'subdir': name.decode('utf8')} - response = {'bytes': size, 'hash': etag, 'name': name.decode('utf8'), - 'content_type': content_type} + if isinstance(record, ShardRange): + created = record.timestamp + response = dict(record) + else: + (name, created, size, content_type, etag) = record[:5] + if content_type is None: + return {'subdir': name.decode('utf8')} + response = { + 'bytes': size, 'hash': etag, 'name': name.decode('utf8'), + 'content_type': content_type} + override_bytes_from_content_type(response, logger=self.logger) response['last_modified'] = Timestamp(created).isoformat - override_bytes_from_content_type(response, logger=self.logger) return response @public @@ -494,12 +577,45 @@ class ContainerController(BaseStorageServer): pending_timeout=0.1, stale_reads_ok=True) info, is_deleted = broker.get_info_is_deleted() - resp_headers = gen_resp_headers(info, is_deleted=is_deleted) - if is_deleted: - return HTTPNotFound(request=req, headers=resp_headers) - container_list = broker.list_objects_iter( - limit, marker, end_marker, prefix, delimiter, path, - storage_policy_index=info['storage_policy_index'], reverse=reverse) + record_type = req.headers.get('x-backend-record-type', '').lower() + if record_type == 'auto' and info.get('db_state') in (SHARDING, + SHARDED): + record_type = 'shard' + if record_type == 'shard': + override_deleted = info and config_true_value( + req.headers.get('x-backend-override-deleted', False)) + resp_headers = gen_resp_headers( + info, is_deleted=is_deleted and not override_deleted) + if is_deleted and not override_deleted: + return HTTPNotFound(request=req, headers=resp_headers) + resp_headers['X-Backend-Record-Type'] = 'shard' + includes = get_param(req, 'includes') + states = get_param(req, 'states') + fill_gaps = False + if states: + states = list_from_csv(states) + fill_gaps = any(('listing' in states, 'updating' in states)) + try: + states = broker.resolve_shard_range_states(states) + except ValueError: + return HTTPBadRequest(request=req, body='Bad state') + include_deleted = config_true_value( + req.headers.get('x-backend-include-deleted', False)) + container_list = broker.get_shard_ranges( + marker, end_marker, includes, reverse, states=states, + include_deleted=include_deleted, fill_gaps=fill_gaps) + else: + resp_headers = gen_resp_headers(info, is_deleted=is_deleted) + if is_deleted: + return HTTPNotFound(request=req, headers=resp_headers) + resp_headers['X-Backend-Record-Type'] = 'object' + # Use the retired db while container is in process of sharding, + # otherwise use current db + src_broker = broker.get_brokers()[0] + container_list = src_broker.list_objects_iter( + limit, marker, end_marker, prefix, delimiter, path, + storage_policy_index=info['storage_policy_index'], + reverse=reverse) return self.create_listing(req, out_content_type, info, resp_headers, broker.metadata, container_list, container) @@ -562,20 +678,7 @@ class ContainerController(BaseStorageServer): if broker.is_deleted(): return HTTPNotFound(request=req) broker.update_put_timestamp(req_timestamp.internal) - metadata = {} - metadata.update( - (key, (value, req_timestamp.internal)) - for key, value in req.headers.items() - if key.lower() in self.save_headers or - is_sys_or_user_meta('container', key)) - if metadata: - if 'X-Container-Sync-To' in metadata: - if 'X-Container-Sync-To' not in broker.metadata or \ - metadata['X-Container-Sync-To'][0] != \ - broker.metadata['X-Container-Sync-To'][0]: - broker.set_x_container_sync_points(-1, -1) - broker.update_metadata(metadata, validate_metadata=True) - self._update_sync_store(broker, 'POST') + self._update_metadata(req, broker, req_timestamp, 'POST') return HTTPNoContent(request=req) def __call__(self, env, start_response): diff --git a/swift/container/sharder.py b/swift/container/sharder.py new file mode 100644 index 0000000000..06c2b6d9db --- /dev/null +++ b/swift/container/sharder.py @@ -0,0 +1,1568 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import errno +import json +import time +from collections import defaultdict +from random import random + +import os +import six +from eventlet import Timeout + +from swift.common import internal_client, db_replicator +from swift.common.constraints import check_drive +from swift.common.direct_client import (direct_put_container, + DirectClientException) +from swift.common.exceptions import DeviceUnavailable +from swift.common.ring.utils import is_local_device +from swift.common.utils import get_logger, config_true_value, \ + dump_recon_cache, whataremyips, Timestamp, ShardRange, GreenAsyncPile, \ + config_float_value, config_positive_int_value, \ + quorum_size, parse_override_options, Everything, config_auto_int_value +from swift.container.backend import ContainerBroker, \ + RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, COLLAPSED, \ + SHARD_UPDATE_STATES +from swift.container.replicator import ContainerReplicator + + +def sharding_enabled(broker): + # NB all shards will by default have been created with + # X-Container-Sysmeta-Sharding set and will therefore be candidates for + # sharding, along with explicitly configured root containers. + sharding = broker.metadata.get('X-Container-Sysmeta-Sharding') + if sharding and config_true_value(sharding[0]): + return True + # if broker has been marked deleted it will have lost sysmeta, but we still + # need to process the broker (for example, to shrink any shard ranges) so + # fallback to checking if it has any shard ranges + if broker.get_shard_ranges(): + return True + return False + + +def make_shard_ranges(broker, shard_data, shards_account_prefix): + timestamp = Timestamp.now() + shard_ranges = [] + for data in shard_data: + # Make a copy so we don't mutate the original + kwargs = data.copy() + path = ShardRange.make_path( + shards_account_prefix + broker.root_account, + broker.root_container, broker.container, + timestamp, kwargs.pop('index')) + + shard_ranges.append(ShardRange(path, timestamp, **kwargs)) + return shard_ranges + + +def find_missing_ranges(shard_ranges): + """ + Find any ranges in the entire object namespace that are not covered by any + shard range in the given list. + + :param shard_ranges: A list of :class:`~swift.utils.ShardRange` + :return: a list of missing ranges + """ + gaps = [] + if not shard_ranges: + return ((ShardRange.MIN, ShardRange.MAX),) + if shard_ranges[0].lower > ShardRange.MIN: + gaps.append((ShardRange.MIN, shard_ranges[0].lower)) + for first, second in zip(shard_ranges, shard_ranges[1:]): + if first.upper < second.lower: + gaps.append((first.upper, second.lower)) + if shard_ranges[-1].upper < ShardRange.MAX: + gaps.append((shard_ranges[-1].upper, ShardRange.MAX)) + return gaps + + +def find_overlapping_ranges(shard_ranges): + """ + Find all pairs of overlapping ranges in the given list. + + :param shard_ranges: A list of :class:`~swift.utils.ShardRange` + :return: a set of tuples, each tuple containing ranges that overlap with + each other. + """ + result = set() + for shard_range in shard_ranges: + overlapping = [sr for sr in shard_ranges + if shard_range != sr and shard_range.overlaps(sr)] + if overlapping: + overlapping.append(shard_range) + overlapping.sort() + result.add(tuple(overlapping)) + + return result + + +def is_sharding_candidate(shard_range, threshold): + return (shard_range.state == ShardRange.ACTIVE and + shard_range.object_count >= threshold) + + +def find_sharding_candidates(broker, threshold, shard_ranges=None): + # this should only execute on root containers; the goal is to find + # large shard containers that should be sharded. + # First cut is simple: assume root container shard usage stats are good + # enough to make decision. + # TODO: object counts may well not be the appropriate metric for + # deciding to shrink because a shard with low object_count may have a + # large number of deleted object rows that will need to be merged with + # a neighbour. We may need to expose row count as well as object count. + if shard_ranges is None: + shard_ranges = broker.get_shard_ranges(states=[ShardRange.ACTIVE]) + candidates = [] + for shard_range in shard_ranges: + if not is_sharding_candidate(shard_range, threshold): + continue + shard_range.update_state(ShardRange.SHARDING, + state_timestamp=Timestamp.now()) + shard_range.epoch = shard_range.state_timestamp + candidates.append(shard_range) + return candidates + + +def find_shrinking_candidates(broker, shrink_threshold, merge_size): + # this should only execute on root containers that have sharded; the + # goal is to find small shard containers that could be retired by + # merging with a neighbour. + # First cut is simple: assume root container shard usage stats are good + # enough to make decision; only merge with upper neighbour so that + # upper bounds never change (shard names include upper bound). + # TODO: object counts may well not be the appropriate metric for + # deciding to shrink because a shard with low object_count may have a + # large number of deleted object rows that will need to be merged with + # a neighbour. We may need to expose row count as well as object count. + shard_ranges = broker.get_shard_ranges() + own_shard_range = broker.get_own_shard_range() + if len(shard_ranges) == 1: + # special case to enable final shard to shrink into root + shard_ranges.append(own_shard_range) + + merge_pairs = {} + for donor, acceptor in zip(shard_ranges, shard_ranges[1:]): + if donor in merge_pairs: + # this range may already have been made an acceptor; if so then + # move on. In principle it might be that even after expansion + # this range and its donor(s) could all be merged with the next + # range. In practice it is much easier to reason about a single + # donor merging into a single acceptor. Don't fret - eventually + # all the small ranges will be retired. + continue + if (acceptor.name != own_shard_range.name and + acceptor.state != ShardRange.ACTIVE): + # don't shrink into a range that is not yet ACTIVE + continue + if donor.state not in (ShardRange.ACTIVE, ShardRange.SHRINKING): + # found? created? sharded? don't touch it + continue + + proposed_object_count = donor.object_count + acceptor.object_count + if (donor.state == ShardRange.SHRINKING or + (donor.object_count < shrink_threshold and + proposed_object_count < merge_size)): + # include previously identified merge pairs on presumption that + # following shrink procedure is idempotent + merge_pairs[acceptor] = donor + if donor.update_state(ShardRange.SHRINKING): + # Set donor state to shrinking so that next cycle won't use + # it as an acceptor; state_timestamp defines new epoch for + # donor and new timestamp for the expanded acceptor below. + donor.epoch = donor.state_timestamp = Timestamp.now() + if acceptor.lower != donor.lower: + # Update the acceptor container with its expanding state to + # prevent it treating objects cleaved from the donor + # as misplaced. + acceptor.lower = donor.lower + acceptor.timestamp = donor.state_timestamp + return merge_pairs + + +class CleavingContext(object): + def __init__(self, ref, cursor='', max_row=None, cleave_to_row=None, + last_cleave_to_row=None, cleaving_done=False, + misplaced_done=False, ranges_done=0, ranges_todo=0): + self.ref = ref + self._cursor = None + self.cursor = cursor + self.max_row = max_row + self.cleave_to_row = cleave_to_row + self.last_cleave_to_row = last_cleave_to_row + self.cleaving_done = cleaving_done + self.misplaced_done = misplaced_done + self.ranges_done = ranges_done + self.ranges_todo = ranges_todo + + def __iter__(self): + yield 'ref', self.ref + yield 'cursor', self.cursor + yield 'max_row', self.max_row + yield 'cleave_to_row', self.cleave_to_row + yield 'last_cleave_to_row', self.last_cleave_to_row + yield 'cleaving_done', self.cleaving_done + yield 'misplaced_done', self.misplaced_done + yield 'ranges_done', self.ranges_done + yield 'ranges_todo', self.ranges_todo + + def _encode(cls, value): + if value is not None and six.PY2 and isinstance(value, six.text_type): + return value.encode('utf-8') + return value + + @property + def cursor(self): + return self._cursor + + @cursor.setter + def cursor(self, value): + self._cursor = self._encode(value) + + @property + def marker(self): + return self.cursor + '\x00' + + @classmethod + def _make_ref(cls, broker): + return broker.get_info()['id'] + + @classmethod + def load(cls, broker): + """ + Returns a context dict for tracking the progress of cleaving this + broker's retiring DB. The context is persisted in sysmeta using a key + that is based off the retiring db id and max row. This form of + key ensures that a cleaving context is only loaded for a db that + matches the id and max row when the context was created; if a db is + modified such that its max row changes then a different context, or no + context, will be loaded. + + :return: A dict to which cleave progress metadata may be added. The + dict initially has a key ``ref`` which should not be modified by + any caller. + """ + brokers = broker.get_brokers() + ref = cls._make_ref(brokers[0]) + data = brokers[-1].get_sharding_sysmeta('Context-' + ref) + data = json.loads(data) if data else {} + data['ref'] = ref + data['max_row'] = brokers[0].get_max_row() + return cls(**data) + + def store(self, broker): + broker.set_sharding_sysmeta('Context-' + self.ref, + json.dumps(dict(self))) + + def reset(self): + self.cursor = '' + self.ranges_done = 0 + self.ranges_todo = 0 + self.cleaving_done = False + self.misplaced_done = False + self.last_cleave_to_row = self.cleave_to_row + + def start(self): + self.cursor = '' + self.ranges_done = 0 + self.ranges_todo = 0 + self.cleaving_done = False + self.cleave_to_row = self.max_row + + def done(self): + return all((self.misplaced_done, self.cleaving_done, + self.max_row == self.cleave_to_row)) + + +DEFAULT_SHARD_CONTAINER_THRESHOLD = 10000000 +DEFAULT_SHARD_SHRINK_POINT = 25 +DEFAULT_SHARD_MERGE_POINT = 75 + + +class ContainerSharder(ContainerReplicator): + """Shards containers.""" + + def __init__(self, conf, logger=None): + logger = logger or get_logger(conf, log_route='container-sharder') + super(ContainerSharder, self).__init__(conf, logger=logger) + self.shards_account_prefix = ( + (conf.get('auto_create_account_prefix') or '.') + 'shards_') + + def percent_value(key, default): + try: + value = conf.get(key, default) + return config_float_value(value, 0, 100) / 100.0 + except ValueError as err: + raise ValueError("%s: %s" % (str(err), key)) + + self.shard_shrink_point = percent_value('shard_shrink_point', + DEFAULT_SHARD_SHRINK_POINT) + self.shrink_merge_point = percent_value('shard_shrink_merge_point', + DEFAULT_SHARD_MERGE_POINT) + self.shard_container_threshold = config_positive_int_value( + conf.get('shard_container_threshold', + DEFAULT_SHARD_CONTAINER_THRESHOLD)) + self.shrink_size = (self.shard_container_threshold * + self.shard_shrink_point) + self.merge_size = (self.shard_container_threshold * + self.shrink_merge_point) + self.split_size = self.shard_container_threshold // 2 + self.scanner_batch_size = config_positive_int_value( + conf.get('shard_scanner_batch_size', 10)) + self.cleave_batch_size = config_positive_int_value( + conf.get('cleave_batch_size', 2)) + self.cleave_row_batch_size = config_positive_int_value( + conf.get('cleave_row_batch_size', 10000)) + self.auto_shard = config_true_value(conf.get('auto_shard', False)) + self.sharding_candidates = [] + self.recon_candidates_limit = int( + conf.get('recon_candidates_limit', 5)) + self.broker_timeout = config_positive_int_value( + conf.get('broker_timeout', 60)) + replica_count = self.ring.replica_count + quorum = quorum_size(replica_count) + self.shard_replication_quorum = config_auto_int_value( + conf.get('shard_replication_quorum'), quorum) + if self.shard_replication_quorum > replica_count: + self.logger.warning( + 'shard_replication_quorum of %s exceeds replica count %s' + ', reducing to %s', self.shard_replication_quorum, + replica_count, replica_count) + self.shard_replication_quorum = replica_count + self.existing_shard_replication_quorum = config_auto_int_value( + conf.get('existing_shard_replication_quorum'), + self.shard_replication_quorum) + if self.existing_shard_replication_quorum > replica_count: + self.logger.warning( + 'existing_shard_replication_quorum of %s exceeds replica count' + ' %s, reducing to %s', self.existing_shard_replication_quorum, + replica_count, replica_count) + self.existing_shard_replication_quorum = replica_count + + # internal client + self.conn_timeout = float(conf.get('conn_timeout', 5)) + request_tries = config_positive_int_value( + conf.get('request_tries', 3)) + internal_client_conf_path = conf.get('internal_client_conf_path', + '/etc/swift/internal-client.conf') + try: + self.int_client = internal_client.InternalClient( + internal_client_conf_path, + 'Swift Container Sharder', + request_tries, + allow_modify_pipeline=False) + except IOError as err: + if err.errno != errno.ENOENT: + raise + raise SystemExit( + 'Unable to load internal client from config: %r (%s)' % + (internal_client_conf_path, err)) + self.reported = 0 + + def _zero_stats(self): + """Zero out the stats.""" + super(ContainerSharder, self)._zero_stats() + # all sharding stats that are additional to the inherited replicator + # stats are maintained under the 'sharding' key in self.stats + self.stats['sharding'] = defaultdict(lambda: defaultdict(int)) + self.sharding_candidates = [] + + def _append_stat(self, category, key, value): + if not self.stats['sharding'][category][key]: + self.stats['sharding'][category][key] = list() + self.stats['sharding'][category][key].append(value) + + def _min_stat(self, category, key, value): + current = self.stats['sharding'][category][key] + if not current: + self.stats['sharding'][category][key] = value + else: + self.stats['sharding'][category][key] = min(current, value) + + def _max_stat(self, category, key, value): + current = self.stats['sharding'][category][key] + if not current: + self.stats['sharding'][category][key] = value + else: + self.stats['sharding'][category][key] = max(current, value) + + def _increment_stat(self, category, key, step=1, statsd=False): + self.stats['sharding'][category][key] += step + if statsd: + statsd_key = '%s_%s' % (category, key) + self.logger.increment(statsd_key) + + def _make_stats_info(self, broker, node, own_shard_range): + try: + file_size = os.stat(broker.db_file).st_size + except OSError: + file_size = None + + return {'path': broker.db_file, + 'node_index': node.get('index'), + 'account': broker.account, + 'container': broker.container, + 'root': broker.root_path, + 'object_count': own_shard_range.object_count, + 'meta_timestamp': own_shard_range.meta_timestamp.internal, + 'file_size': file_size} + + def _identify_sharding_candidate(self, broker, node): + own_shard_range = broker.get_own_shard_range() + if is_sharding_candidate( + own_shard_range, self.shard_container_threshold): + self.sharding_candidates.append( + self._make_stats_info(broker, node, own_shard_range)) + + def _transform_sharding_candidate_stats(self): + category = self.stats['sharding']['sharding_candidates'] + candidates = self.sharding_candidates + category['found'] = len(candidates) + candidates.sort(key=lambda c: c['object_count'], reverse=True) + if self.recon_candidates_limit >= 0: + category['top'] = candidates[:self.recon_candidates_limit] + else: + category['top'] = candidates + + def _record_sharding_progress(self, broker, node, error): + own_shard_range = broker.get_own_shard_range() + if (broker.get_db_state() in (UNSHARDED, SHARDING) and + own_shard_range.state in (ShardRange.SHARDING, + ShardRange.SHARDED)): + info = self._make_stats_info(broker, node, own_shard_range) + info['state'] = own_shard_range.state_text + info['db_state'] = broker.get_db_state() + states = [ShardRange.FOUND, ShardRange.CREATED, + ShardRange.CLEAVED, ShardRange.ACTIVE] + shard_ranges = broker.get_shard_ranges(states=states) + state_count = {} + for state in states: + state_count[ShardRange.STATES[state]] = 0 + for shard_range in shard_ranges: + state_count[shard_range.state_text] += 1 + info.update(state_count) + info['error'] = error and str(error) + self._append_stat('sharding_in_progress', 'all', info) + + def _report_stats(self): + # report accumulated stats since start of one sharder cycle + default_stats = ('attempted', 'success', 'failure') + category_keys = ( + ('visited', default_stats + ('skipped', 'completed')), + ('scanned', default_stats + ('found', 'min_time', 'max_time')), + ('created', default_stats), + ('cleaved', default_stats + ('min_time', 'max_time',)), + ('misplaced', default_stats + ('found', 'placed', 'unplaced')), + ('audit_root', default_stats), + ('audit_shard', default_stats), + ) + + now = time.time() + last_report = time.ctime(self.stats['start']) + elapsed = now - self.stats['start'] + sharding_stats = self.stats['sharding'] + for category, keys in category_keys: + stats = sharding_stats[category] + msg = ' '.join(['%s:%s' % (k, str(stats[k])) for k in keys]) + self.logger.info('Since %s %s - %s', last_report, category, msg) + + self._transform_sharding_candidate_stats() + + dump_recon_cache( + {'sharding_stats': self.stats, + 'sharding_time': elapsed, + 'sharding_last': now}, + self.rcache, self.logger) + self.reported = now + + def _periodic_report_stats(self): + if (time.time() - self.reported) >= 3600: # once an hour + self._report_stats() + + def _check_node(self, node): + if not node: + return False + if not is_local_device(self.ips, self.port, + node['replication_ip'], + node['replication_port']): + return False + if not check_drive(self.root, node['device'], + self.mount_check): + self.logger.warning( + 'Skipping %(device)s as it is not mounted' % node) + return False + return True + + def _fetch_shard_ranges(self, broker, newest=False, params=None, + include_deleted=False): + path = self.int_client.make_path(broker.root_account, + broker.root_container) + params = params or {} + params.setdefault('format', 'json') + headers = {'X-Backend-Record-Type': 'shard', + 'X-Backend-Override-Deleted': 'true', + 'X-Backend-Include-Deleted': str(include_deleted)} + if newest: + headers['X-Newest'] = 'true' + try: + try: + resp = self.int_client.make_request( + 'GET', path, headers, acceptable_statuses=(2,), + params=params) + except internal_client.UnexpectedResponse as err: + self.logger.warning("Failed to get shard ranges from %s: %s", + broker.root_path, err) + return None + record_type = resp.headers.get('x-backend-record-type') + if record_type != 'shard': + err = 'unexpected record type %r' % record_type + self.logger.error("Failed to get shard ranges from %s: %s", + broker.root_path, err) + return None + + try: + data = json.loads(resp.body) + if not isinstance(data, list): + raise ValueError('not a list') + return [ShardRange.from_dict(shard_range) + for shard_range in data] + except (ValueError, TypeError, KeyError) as err: + self.logger.error( + "Failed to get shard ranges from %s: invalid data: %r", + broker.root_path, err) + return None + finally: + self.logger.txn_id = None + + def _put_container(self, node, part, account, container, headers, body): + try: + direct_put_container(node, part, account, container, + conn_timeout=self.conn_timeout, + response_timeout=self.node_timeout, + headers=headers, contents=body) + except DirectClientException as err: + self.logger.warning( + 'Failed to put shard ranges to %s:%s/%s: %s', + node['ip'], node['port'], node['device'], err.http_status) + except (Exception, Timeout) as err: + self.logger.exception( + 'Failed to put shard ranges to %s:%s/%s: %s', + node['ip'], node['port'], node['device'], err) + else: + return True + return False + + def _send_shard_ranges(self, account, container, shard_ranges, + headers=None): + body = json.dumps([dict(sr) for sr in shard_ranges]) + part, nodes = self.ring.get_nodes(account, container) + headers = headers or {} + headers.update({'X-Backend-Record-Type': RECORD_TYPE_SHARD, + 'User-Agent': 'container-sharder %s' % os.getpid(), + 'X-Timestamp': Timestamp.now().normal, + 'Content-Length': len(body), + 'Content-Type': 'application/json'}) + + pool = GreenAsyncPile(len(nodes)) + for node in nodes: + pool.spawn(self._put_container, node, part, account, + container, headers, body) + + results = pool.waitall(None) + return results.count(True) >= quorum_size(self.ring.replica_count) + + def _get_shard_broker(self, shard_range, root_path, policy_index): + """ + Get a broker for a container db for the given shard range. If one of + the shard container's primary nodes is a local device then that will be + chosen for the db, otherwise the first of the shard container's handoff + nodes that is local will be chosen. + + :param shard_range: a :class:`~swift.common.utils.ShardRange` + :param root_path: the path of the shard's root container + :param policy_index: the storage policy index + :returns: a tuple of ``(part, broker, node_id)`` where ``part`` is the + shard container's partition, ``broker`` is an instance of + :class:`~swift.container.backend.ContainerBroker`, + ``node_id`` is the id of the selected node. + """ + part = self.ring.get_part(shard_range.account, shard_range.container) + node = self.find_local_handoff_for_part(part) + if not node: + raise DeviceUnavailable( + 'No mounted devices found suitable for creating shard broker' + 'for %s in partition %s' % (shard_range.name, part)) + + shard_broker = ContainerBroker.create_broker( + os.path.join(self.root, node['device']), part, shard_range.account, + shard_range.container, epoch=shard_range.epoch, + storage_policy_index=policy_index) + + # Get the valid info into the broker.container, etc + shard_broker.get_info() + shard_broker.merge_shard_ranges(shard_range) + shard_broker.set_sharding_sysmeta('Root', root_path) + shard_broker.update_metadata({ + 'X-Container-Sysmeta-Sharding': + ('True', Timestamp.now().internal)}) + + return part, shard_broker, node['id'] + + def _audit_root_container(self, broker): + # This is the root container, and therefore the tome of knowledge, + # all we can do is check there is nothing screwy with the ranges + self._increment_stat('audit_root', 'attempted') + warnings = [] + own_shard_range = broker.get_own_shard_range() + + if own_shard_range.state in (ShardRange.SHARDING, ShardRange.SHARDED): + shard_ranges = broker.get_shard_ranges() + missing_ranges = find_missing_ranges(shard_ranges) + if missing_ranges: + warnings.append( + 'missing range(s): %s' % + ' '.join(['%s-%s' % (lower, upper) + for lower, upper in missing_ranges])) + + for state in ShardRange.STATES: + shard_ranges = broker.get_shard_ranges(states=state) + overlaps = find_overlapping_ranges(shard_ranges) + for overlapping_ranges in overlaps: + warnings.append( + 'overlapping ranges in state %s: %s' % + (ShardRange.STATES[state], + ' '.join(['%s-%s' % (sr.lower, sr.upper) + for sr in overlapping_ranges]))) + + if warnings: + self.logger.warning( + 'Audit failed for root %s (%s): %s' % + (broker.db_file, broker.path, ', '.join(warnings))) + self._increment_stat('audit_root', 'failure', statsd=True) + return False + + self._increment_stat('audit_root', 'success', statsd=True) + return True + + def _audit_shard_container(self, broker): + # Get the root view of the world. + self._increment_stat('audit_shard', 'attempted') + warnings = [] + errors = [] + if not broker.account.startswith(self.shards_account_prefix): + warnings.append('account not in shards namespace %r' % + self.shards_account_prefix) + + own_shard_range = broker.get_own_shard_range(no_default=True) + + shard_range = None + if own_shard_range: + shard_ranges = self._fetch_shard_ranges( + broker, newest=True, + params={'marker': own_shard_range.lower, + 'end_marker': own_shard_range.upper}, + include_deleted=True) + if shard_ranges: + for shard_range in shard_ranges: + if (shard_range.lower == own_shard_range.lower and + shard_range.upper == own_shard_range.upper and + shard_range.name == own_shard_range.name): + break + else: + # this is not necessarily an error - some replicas of the + # root may not yet know about this shard container + warnings.append('root has no matching shard range') + shard_range = None + else: + warnings.append('unable to get shard ranges from root') + else: + errors.append('missing own shard range') + + if warnings: + self.logger.warning( + 'Audit warnings for shard %s (%s): %s' % + (broker.db_file, broker.path, ', '.join(warnings))) + + if errors: + self.logger.warning( + 'Audit failed for shard %s (%s) - skipping: %s' % + (broker.db_file, broker.path, ', '.join(errors))) + self._increment_stat('audit_shard', 'failure', statsd=True) + return False + + if shard_range: + self.logger.debug('Updating shard from root %s', dict(shard_range)) + broker.merge_shard_ranges(shard_range) + own_shard_range = broker.get_own_shard_range() + delete_age = time.time() - self.reclaim_age + if (own_shard_range.state == ShardRange.SHARDED and + own_shard_range.deleted and + own_shard_range.timestamp < delete_age and + broker.empty()): + broker.delete_db(Timestamp.now().internal) + self.logger.debug('Deleted shard container %s (%s)', + broker.db_file, broker.path) + self._increment_stat('audit_shard', 'success', statsd=True) + return True + + def _audit_container(self, broker): + if broker.is_deleted(): + # if the container has been marked as deleted, all metadata will + # have been erased so no point auditing. But we want it to pass, in + # case any objects exist inside it. + return True + if broker.is_root_container(): + return self._audit_root_container(broker) + return self._audit_shard_container(broker) + + def yield_objects(self, broker, src_shard_range, since_row=None): + """ + Iterates through all objects in ``src_shard_range`` in name order + yielding them in lists of up to CONTAINER_LISTING_LIMIT length. + + :param broker: A :class:`~swift.container.backend.ContainerBroker`. + :param src_shard_range: A :class:`~swift.common.utils.ShardRange` + describing the source range. + :param since_row: include only items whose ROWID is greater than + the given row id; by default all rows are included. + :return: a generator of tuples of (list of objects, broker info dict) + """ + for include_deleted in (False, True): + marker = src_shard_range.lower_str + while True: + info = broker.get_info() + info['max_row'] = broker.get_max_row() + start = time.time() + objects = broker.get_objects( + self.cleave_row_batch_size, + marker=marker, + end_marker=src_shard_range.end_marker, + include_deleted=include_deleted, + since_row=since_row) + if objects: + self.logger.debug('got %s objects from %s in %ss', + len(objects), broker.db_file, + time.time() - start) + yield objects, info + + if len(objects) < self.cleave_row_batch_size: + break + marker = objects[-1]['name'] + + def yield_objects_to_shard_range(self, broker, src_shard_range, + dest_shard_ranges): + """ + Iterates through all objects in ``src_shard_range`` to place them in + destination shard ranges provided by the ``next_shard_range`` function. + Yields tuples of (object list, destination shard range in which those + objects belong). Note that the same destination shard range may be + referenced in more than one yielded tuple. + + :param broker: A :class:`~swift.container.backend.ContainerBroker`. + :param src_shard_range: A :class:`~swift.common.utils.ShardRange` + describing the source range. + :param dest_shard_ranges: A function which should return a list of + destination shard ranges in name order. + :return: a generator of tuples of + (object list, shard range, broker info dict) + """ + dest_shard_range_iter = dest_shard_range = None + for objs, info in self.yield_objects(broker, src_shard_range): + if not objs: + return + + def next_or_none(it): + try: + return next(it) + except StopIteration: + return None + + if dest_shard_range_iter is None: + dest_shard_range_iter = iter(dest_shard_ranges()) + dest_shard_range = next_or_none(dest_shard_range_iter) + + unplaced = False + last_index = next_index = 0 + for obj in objs: + if dest_shard_range is None: + # no more destinations: yield remainder of batch and return + # NB there may be more batches of objects but none of them + # will be placed so no point fetching them + yield objs[last_index:], None, info + return + if obj['name'] <= dest_shard_range.lower: + unplaced = True + elif unplaced: + # end of run of unplaced objects, yield them + yield objs[last_index:next_index], None, info + last_index = next_index + unplaced = False + while (dest_shard_range and + obj['name'] > dest_shard_range.upper): + if next_index != last_index: + # yield the objects in current dest_shard_range + yield (objs[last_index:next_index], + dest_shard_range, + info) + last_index = next_index + dest_shard_range = next_or_none(dest_shard_range_iter) + next_index += 1 + + if next_index != last_index: + # yield tail of current batch of objects + # NB there may be more objects for the current + # dest_shard_range in the next batch from yield_objects + yield (objs[last_index:next_index], + None if unplaced else dest_shard_range, + info) + + def _post_replicate_hook(self, broker, info, responses): + # override superclass behaviour + pass + + def _replicate_and_delete(self, broker, dest_shard_range, part, + dest_broker, node_id, info): + success, responses = self._replicate_object( + part, dest_broker.db_file, node_id) + quorum = quorum_size(self.ring.replica_count) + if not success and responses.count(True) < quorum: + self.logger.warning( + 'Failed to sufficiently replicate misplaced objects: %s in %s ' + '(not removing)', dest_shard_range, broker.path) + return False + + if broker.get_info()['id'] != info['id']: + # the db changed - don't remove any objects + success = False + else: + # remove objects up to the max row of the db sampled prior to + # the first object yielded for this destination; objects added + # after that point may not have been yielded and replicated so + # it is not safe to remove them yet + broker.remove_objects( + dest_shard_range.lower_str, + dest_shard_range.upper_str, + max_row=info['max_row']) + success = True + + if not success: + self.logger.warning( + 'Refused to remove misplaced objects: %s in %s', + dest_shard_range, broker.path) + return success + + def _move_objects(self, src_broker, src_shard_range, policy_index, + shard_range_fetcher): + # move objects from src_shard_range in src_broker to destination shard + # ranges provided by shard_range_fetcher + dest_brokers = {} # map shard range -> broker + placed = unplaced = 0 + success = True + for objs, dest_shard_range, info in self.yield_objects_to_shard_range( + src_broker, src_shard_range, shard_range_fetcher): + if not dest_shard_range: + unplaced += len(objs) + success = False + continue + + if dest_shard_range.name == src_broker.path: + self.logger.debug( + 'Skipping source as misplaced objects destination') + # in shrinking context, the misplaced objects might actually be + # correctly placed if the root has expanded this shard but this + # broker has not yet been updated + continue + + if dest_shard_range not in dest_brokers: + part, dest_broker, node_id = self._get_shard_broker( + dest_shard_range, src_broker.root_path, policy_index) + # save the broker info that was sampled prior to the *first* + # yielded objects for this destination + destination = {'part': part, + 'dest_broker': dest_broker, + 'node_id': node_id, + 'info': info} + dest_brokers[dest_shard_range] = destination + else: + destination = dest_brokers[dest_shard_range] + destination['dest_broker'].merge_items(objs) + placed += len(objs) + + if unplaced: + self.logger.warning( + 'Failed to find destination for at least %s misplaced objects ' + 'in %s' % (unplaced, src_broker.path)) + + # TODO: consider executing the replication jobs concurrently + for dest_shard_range, dest_args in dest_brokers.items(): + self.logger.debug('moving misplaced objects found in range %s' % + dest_shard_range) + success &= self._replicate_and_delete( + src_broker, dest_shard_range, **dest_args) + + self._increment_stat('misplaced', 'placed', step=placed) + self._increment_stat('misplaced', 'unplaced', step=unplaced) + return success, placed + unplaced + + def _make_shard_range_fetcher(self, broker, src_shard_range): + # returns a function that will lazy load shard ranges on demand; + # this means only one lookup is made for all misplaced ranges. + outer = {} + + def shard_range_fetcher(): + if not outer: + if broker.is_root_container(): + ranges = broker.get_shard_ranges( + marker=src_shard_range.lower_str, + end_marker=src_shard_range.end_marker, + states=SHARD_UPDATE_STATES) + else: + # TODO: the root may not yet know about shard ranges to + # which a shard is sharding, but those could come from + # the broker + ranges = self._fetch_shard_ranges( + broker, newest=True, + params={'states': 'updating', + 'marker': src_shard_range.lower_str, + 'end_marker': src_shard_range.end_marker}) + outer['ranges'] = iter(ranges) + return outer['ranges'] + return shard_range_fetcher + + def _make_default_misplaced_object_bounds(self, broker): + # Objects outside of this container's own range are misplaced. + own_shard_range = broker.get_own_shard_range() + bounds = [] + if own_shard_range.lower: + bounds.append(('', own_shard_range.lower)) + if own_shard_range.upper: + bounds.append((own_shard_range.upper, '')) + return bounds + + def _make_misplaced_object_bounds(self, broker): + bounds = [] + state = broker.get_db_state() + if state == SHARDED: + # Anything in the object table is treated as a misplaced object. + bounds.append(('', '')) + + if not bounds and state == SHARDING: + # Objects outside of this container's own range are misplaced. + # Objects in already cleaved shard ranges are also misplaced. + cleave_context = CleavingContext.load(broker) + if cleave_context.cursor: + bounds.append(('', cleave_context.cursor)) + own_shard_range = broker.get_own_shard_range() + if own_shard_range.upper: + bounds.append((own_shard_range.upper, '')) + + return bounds or self._make_default_misplaced_object_bounds(broker) + + def _move_misplaced_objects(self, broker, src_broker=None, + src_bounds=None): + """ + Search for objects in the given broker that do not belong in that + broker's namespace and move those objects to their correct shard + container. + + :param broker: An instance of :class:`swift.container.ContainerBroker`. + :param src_broker: optional alternative broker to use as the source + of misplaced objects; if not specified then ``broker`` is used as + the source. + :param src_bounds: optional list of (lower, upper) namespace bounds to + use when searching for misplaced objects + :return: True if all misplaced objects were sufficiently replicated to + their correct shard containers, False otherwise + """ + self.logger.debug('Looking for misplaced objects in %s (%s)', + broker.path.decode('utf-8'), broker.db_file) + self._increment_stat('misplaced', 'attempted') + src_broker = src_broker or broker + if src_bounds is None: + src_bounds = self._make_misplaced_object_bounds(broker) + # (ab)use ShardRange instances to encapsulate source namespaces + src_ranges = [ShardRange('dont/care', Timestamp.now(), lower, upper) + for lower, upper in src_bounds] + self.logger.debug('misplaced object source bounds %s' % src_bounds) + policy_index = broker.storage_policy_index + success = True + num_found = 0 + for src_shard_range in src_ranges: + part_success, part_num_found = self._move_objects( + src_broker, src_shard_range, policy_index, + self._make_shard_range_fetcher(broker, src_shard_range)) + success &= part_success + num_found += part_num_found + + if num_found: + self._increment_stat('misplaced', 'found', statsd=True) + self.logger.debug('Moved %s misplaced objects' % num_found) + self._increment_stat('misplaced', 'success' if success else 'failure') + self.logger.debug('Finished handling misplaced objects') + return success + + def _find_shard_ranges(self, broker): + """ + Scans the container to find shard ranges and adds them to the shard + ranges table. If there are existing shard ranges then scanning starts + from the upper bound of the uppermost existing shard range. + + :param broker: An instance of :class:`swift.container.ContainerBroker` + :return: a tuple of (success, num of shard ranges found) where success + is True if the last shard range has been found, False otherwise. + """ + own_shard_range = broker.get_own_shard_range() + shard_ranges = broker.get_shard_ranges() + if shard_ranges and shard_ranges[-1].upper >= own_shard_range.upper: + self.logger.debug('Scan already completed for %s', broker.path) + return 0 + + self.logger.info('Starting scan for shard ranges on %s', broker.path) + self._increment_stat('scanned', 'attempted') + + start = time.time() + shard_data, last_found = broker.find_shard_ranges( + self.split_size, limit=self.scanner_batch_size, + existing_ranges=shard_ranges) + elapsed = time.time() - start + + if not shard_data: + if last_found: + self.logger.info("Already found all shard ranges") + self._increment_stat('scanned', 'success', statsd=True) + else: + # we didn't find anything + self.logger.warning("No shard ranges found") + self._increment_stat('scanned', 'failure', statsd=True) + return 0 + + shard_ranges = make_shard_ranges( + broker, shard_data, self.shards_account_prefix) + broker.merge_shard_ranges(shard_ranges) + num_found = len(shard_ranges) + self.logger.info( + "Completed scan for shard ranges: %d found", num_found) + self._increment_stat('scanned', 'found', step=num_found) + self._min_stat('scanned', 'min_time', round(elapsed / num_found, 3)) + self._max_stat('scanned', 'max_time', round(elapsed / num_found, 3)) + + if last_found: + self.logger.info("Final shard range reached.") + self._increment_stat('scanned', 'success', statsd=True) + return num_found + + def _create_shard_containers(self, broker): + # Create shard containers that are ready to receive redirected object + # updates. Do this now, so that redirection can begin immediately + # without waiting for cleaving to complete. + found_ranges = broker.get_shard_ranges(states=ShardRange.FOUND) + created_ranges = [] + for shard_range in found_ranges: + self._increment_stat('created', 'attempted') + shard_range.update_state(ShardRange.CREATED) + headers = { + 'X-Backend-Storage-Policy-Index': broker.storage_policy_index, + 'X-Container-Sysmeta-Shard-Root': broker.root_path, + 'X-Container-Sysmeta-Sharding': True} + success = self._send_shard_ranges( + shard_range.account, shard_range.container, + [shard_range], headers=headers) + if success: + self.logger.debug('PUT new shard range container for %s', + shard_range) + self._increment_stat('created', 'success', statsd=True) + else: + self.logger.error( + 'PUT of new shard container %r failed for %s.', + shard_range, broker.path) + self._increment_stat('created', 'failure', statsd=True) + # break, not continue, because elsewhere it is assumed that + # finding and cleaving shard ranges progresses linearly, so we + # do not want any subsequent shard ranges to be in created + # state while this one is still in found state + break + created_ranges.append(shard_range) + + if created_ranges: + broker.merge_shard_ranges(created_ranges) + if not broker.is_root_container(): + self._send_shard_ranges( + broker.root_account, broker.root_container, created_ranges) + self.logger.info( + "Completed creating shard range containers: %d created.", + len(created_ranges)) + return len(created_ranges) + + def _cleave_shard_range(self, broker, cleaving_context, shard_range): + self.logger.info("Cleaving '%s' from row %s into %s for %r", + broker.path, cleaving_context.last_cleave_to_row, + shard_range.name, shard_range) + self._increment_stat('cleaved', 'attempted') + start = time.time() + policy_index = broker.storage_policy_index + try: + shard_part, shard_broker, node_id = self._get_shard_broker( + shard_range, broker.root_path, policy_index) + except DeviceUnavailable as duex: + self.logger.warning(str(duex)) + self._increment_stat('cleaved', 'failure', statsd=True) + return False + + # only cleave from the retiring db - misplaced objects handler will + # deal with any objects in the fresh db + source_broker = broker.get_brokers()[0] + # if this range has been cleaved before but replication + # failed then the shard db may still exist and it may not be + # necessary to merge all the rows again + source_db_id = source_broker.get_info()['id'] + source_max_row = source_broker.get_max_row() + sync_point = shard_broker.get_sync(source_db_id) + if sync_point < source_max_row: + sync_from_row = max(cleaving_context.last_cleave_to_row, + sync_point) + for objects, info in self.yield_objects( + source_broker, shard_range, + since_row=sync_from_row): + shard_broker.merge_items(objects) + # Note: the max row stored as a sync point is sampled *before* + # objects are yielded to ensure that is less than or equal to + # the last yielded row. Other sync points are also copied from the + # source broker to the shards; if another replica of the source + # happens to subsequently cleave into a primary replica of the + # shard then it will only need to cleave rows after its last sync + # point with this replica of the source broker. + shard_broker.merge_syncs( + [{'sync_point': source_max_row, 'remote_id': source_db_id}] + + source_broker.get_syncs()) + else: + self.logger.debug("Cleaving '%s': %r - shard db already in sync", + broker.path, shard_range) + + own_shard_range = broker.get_own_shard_range() + + replication_quorum = self.existing_shard_replication_quorum + if shard_range.includes(own_shard_range): + # When shrinking, include deleted own (donor) shard range in + # the replicated db so that when acceptor next updates root it + # will atomically update its namespace *and* delete the donor. + # Don't do this when sharding a shard because the donor + # namespace should not be deleted until all shards are cleaved. + if own_shard_range.update_state(ShardRange.SHARDED): + own_shard_range.set_deleted() + broker.merge_shard_ranges(own_shard_range) + shard_broker.merge_shard_ranges(own_shard_range) + elif shard_range.state == ShardRange.CREATED: + # The shard range object stats may have changed since the shard + # range was found, so update with stats of objects actually + # copied to the shard broker. Only do this the first time each + # shard range is cleaved. + info = shard_broker.get_info() + shard_range.update_meta( + info['object_count'], info['bytes_used']) + shard_range.update_state(ShardRange.CLEAVED) + shard_broker.merge_shard_ranges(shard_range) + replication_quorum = self.shard_replication_quorum + + self.logger.info( + 'Replicating new shard container %s for %s', + shard_broker.path, shard_broker.get_own_shard_range()) + + success, responses = self._replicate_object( + shard_part, shard_broker.db_file, node_id) + + replication_successes = responses.count(True) + if (not success and (not responses or + replication_successes < replication_quorum)): + # insufficient replication or replication not even attempted; + # break because we don't want to progress the cleave cursor + # until each shard range has been successfully cleaved + self.logger.warning( + 'Failed to sufficiently replicate cleaved shard %s for %s: ' + '%s successes, %s required.', shard_range, broker.path, + replication_successes, replication_quorum) + self._increment_stat('cleaved', 'failure', statsd=True) + return False + + elapsed = round(time.time() - start, 3) + self._min_stat('cleaved', 'min_time', elapsed) + self._max_stat('cleaved', 'max_time', elapsed) + broker.merge_shard_ranges(shard_range) + cleaving_context.cursor = shard_range.upper_str + cleaving_context.ranges_done += 1 + cleaving_context.ranges_todo -= 1 + if shard_range.upper >= own_shard_range.upper: + # cleaving complete + cleaving_context.cleaving_done = True + cleaving_context.store(broker) + self.logger.info( + 'Cleaved %s for shard range %s in %gs.', + broker.path, shard_range, elapsed) + self._increment_stat('cleaved', 'success', statsd=True) + return True + + def _cleave(self, broker): + # Returns True if misplaced objects have been moved and the entire + # container namespace has been successfully cleaved, False otherwise + if broker.is_sharded(): + self.logger.debug('Passing over already sharded container %s/%s', + broker.account, broker.container) + return True + + cleaving_context = CleavingContext.load(broker) + if not cleaving_context.misplaced_done: + # ensure any misplaced objects in the source broker are moved; note + # that this invocation of _move_misplaced_objects is targetted at + # the *retiring* db. + self.logger.debug( + 'Moving any misplaced objects from sharding container: %s', + broker.path) + bounds = self._make_default_misplaced_object_bounds(broker) + cleaving_context.misplaced_done = self._move_misplaced_objects( + broker, src_broker=broker.get_brokers()[0], + src_bounds=bounds) + cleaving_context.store(broker) + + if cleaving_context.cleaving_done: + self.logger.debug('Cleaving already complete for container %s', + broker.path) + return cleaving_context.misplaced_done + + ranges_todo = broker.get_shard_ranges(marker=cleaving_context.marker) + if cleaving_context.cursor: + # always update ranges_todo in case more ranges have been found + # since last visit + cleaving_context.ranges_todo = len(ranges_todo) + self.logger.debug('Continuing to cleave (%s done, %s todo): %s', + cleaving_context.ranges_done, + cleaving_context.ranges_todo, + broker.path) + else: + cleaving_context.start() + cleaving_context.ranges_todo = len(ranges_todo) + self.logger.debug('Starting to cleave (%s todo): %s', + cleaving_context.ranges_todo, broker.path) + + ranges_done = [] + for shard_range in ranges_todo[:self.cleave_batch_size]: + if shard_range.state == ShardRange.FOUND: + break + elif shard_range.state in (ShardRange.CREATED, + ShardRange.CLEAVED, + ShardRange.ACTIVE): + if self._cleave_shard_range( + broker, cleaving_context, shard_range): + ranges_done.append(shard_range) + else: + break + else: + self.logger.warning('Unexpected shard range state for cleave', + shard_range.state) + break + + if not ranges_done: + cleaving_context.store(broker) + self.logger.debug( + 'Cleaved %s shard ranges for %s', len(ranges_done), broker.path) + return (cleaving_context.misplaced_done and + cleaving_context.cleaving_done) + + def _complete_sharding(self, broker): + cleaving_context = CleavingContext.load(broker) + if cleaving_context.done(): + # Move all CLEAVED shards to ACTIVE state and if a shard then + # delete own shard range; these changes will be simultaneously + # reported in the next update to the root container. + modified_shard_ranges = broker.get_shard_ranges( + states=ShardRange.CLEAVED) + for sr in modified_shard_ranges: + sr.update_state(ShardRange.ACTIVE) + own_shard_range = broker.get_own_shard_range() + own_shard_range.update_state(ShardRange.SHARDED) + own_shard_range.update_meta(0, 0) + if (not broker.is_root_container() and not + own_shard_range.deleted): + own_shard_range = own_shard_range.copy( + timestamp=Timestamp.now(), deleted=1) + modified_shard_ranges.append(own_shard_range) + broker.merge_shard_ranges(modified_shard_ranges) + if broker.set_sharded_state(): + return True + else: + self.logger.warning( + 'Failed to remove retiring db file for %s', + broker.path) + else: + self.logger.warning( + 'Repeat cleaving required for %r with context: %s' + % (broker.db_files[0], dict(cleaving_context))) + cleaving_context.reset() + cleaving_context.store(broker) + + return False + + def _find_and_enable_sharding_candidates(self, broker, shard_ranges=None): + candidates = find_sharding_candidates( + broker, self.shard_container_threshold, shard_ranges) + if candidates: + self.logger.debug('Identified %s sharding candidates' + % len(candidates)) + broker.merge_shard_ranges(candidates) + + def _find_and_enable_shrinking_candidates(self, broker): + if not broker.is_sharded(): + self.logger.warning('Cannot shrink a not yet sharded container %s', + broker.path) + return + + merge_pairs = find_shrinking_candidates( + broker, self.shrink_size, self.merge_size) + self.logger.debug('Found %s shrinking candidates' % len(merge_pairs)) + own_shard_range = broker.get_own_shard_range() + for acceptor, donor in merge_pairs.items(): + self.logger.debug('shrinking shard range %s into %s in %s' % + (donor, acceptor, broker.db_file)) + broker.merge_shard_ranges([acceptor, donor]) + if acceptor.name != own_shard_range.name: + self._send_shard_ranges( + acceptor.account, acceptor.container, [acceptor]) + acceptor.increment_meta(donor.object_count, donor.bytes_used) + else: + # no need to change namespace or stats + acceptor.update_state(ShardRange.ACTIVE, + state_timestamp=Timestamp.now()) + # Now send a copy of the expanded acceptor, with an updated + # timestamp, to the donor container. This forces the donor to + # asynchronously cleave its entire contents to the acceptor and + # delete itself. The donor will pass its own deleted shard range to + # the acceptor when cleaving. Subsequent updates from the donor or + # the acceptor will then update the root to have the deleted donor + # shard range. + self._send_shard_ranges( + donor.account, donor.container, [donor, acceptor]) + + def _update_root_container(self, broker): + own_shard_range = broker.get_own_shard_range(no_default=True) + if not own_shard_range: + return + + # persist the reported shard metadata + broker.merge_shard_ranges(own_shard_range) + # now get a consistent list of own and other shard ranges + shard_ranges = broker.get_shard_ranges( + include_own=True, + include_deleted=True) + # send everything + self._send_shard_ranges( + broker.root_account, broker.root_container, + shard_ranges) + + def _process_broker(self, broker, node, part): + broker.get_info() # make sure account/container are populated + state = broker.get_db_state() + self.logger.debug('Starting processing %s state %s', + broker.path, state) + + if not self._audit_container(broker): + return + + # now look and deal with misplaced objects. + self._move_misplaced_objects(broker) + + if broker.is_deleted(): + # This container is deleted so we can skip it. We still want + # deleted containers to go via misplaced items because they may + # have new objects sitting in them that may need to move. + return + + is_leader = node['index'] == 0 and self.auto_shard + if state in (UNSHARDED, COLLAPSED): + if is_leader and broker.is_root_container(): + # bootstrap sharding of root container + self._find_and_enable_sharding_candidates( + broker, shard_ranges=[broker.get_own_shard_range()]) + + own_shard_range = broker.get_own_shard_range() + if own_shard_range.state in (ShardRange.SHARDING, + ShardRange.SHRINKING, + ShardRange.SHARDED): + if broker.get_shard_ranges(): + # container has been given shard ranges rather than + # found them e.g. via replication or a shrink event + if broker.set_sharding_state(): + state = SHARDING + elif is_leader: + if broker.set_sharding_state(): + state = SHARDING + else: + self.logger.debug( + 'Own shard range in state %r but no shard ranges ' + 'and not leader; remaining unsharded: %s' + % (own_shard_range.state_text, broker.path)) + + if state == SHARDING: + if is_leader: + num_found = self._find_shard_ranges(broker) + else: + num_found = 0 + + # create shard containers for newly found ranges + num_created = self._create_shard_containers(broker) + + if num_found or num_created: + # share updated shard range state with other nodes + self._replicate_object(part, broker.db_file, node['id']) + + # always try to cleave any pending shard ranges + cleave_complete = self._cleave(broker) + + if cleave_complete: + self.logger.info('Completed cleaving of %s', broker.path) + if self._complete_sharding(broker): + state = SHARDED + self._increment_stat('visited', 'completed', statsd=True) + else: + self.logger.debug('Remaining in sharding state %s', + broker.path) + + if state == SHARDED and broker.is_root_container(): + if is_leader: + self._find_and_enable_shrinking_candidates(broker) + self._find_and_enable_sharding_candidates(broker) + for shard_range in broker.get_shard_ranges( + states=[ShardRange.SHARDING]): + self._send_shard_ranges( + shard_range.account, shard_range.container, + [shard_range]) + + if not broker.is_root_container(): + # Update the root container with this container's shard range + # info; do this even when sharded in case previous attempts + # failed; don't do this if there is no own shard range. When + # sharding a shard, this is when the root will see the new + # shards move to ACTIVE state and the sharded shard + # simultaneously become deleted. + self._update_root_container(broker) + + self.logger.debug('Finished processing %s/%s state %s', + broker.account, broker.container, + broker.get_db_state()) + + def _one_shard_cycle(self, devices_to_shard, partitions_to_shard): + """ + The main function, everything the sharder does forks from this method. + + The sharder loops through each container with sharding enabled and each + sharded container on the server, on each container it: + - audits the container + - checks and deals with misplaced items + - cleaves any shard ranges as required + - if not a root container, reports shard range stats to the root + container + """ + self.logger.info('Container sharder cycle starting, auto-sharding %s', + self.auto_shard) + if isinstance(devices_to_shard, (list, tuple)): + self.logger.info('(Override devices: %s)', + ', '.join(str(d) for d in devices_to_shard)) + if isinstance(partitions_to_shard, (list, tuple)): + self.logger.info('(Override partitions: %s)', + ', '.join(str(p) for p in partitions_to_shard)) + self._zero_stats() + self._local_device_ids = set() + dirs = [] + self.ips = whataremyips(bind_ip=self.bind_ip) + for node in self.ring.devs: + if not self._check_node(node): + continue + datadir = os.path.join(self.root, node['device'], self.datadir) + if os.path.isdir(datadir): + # Populate self._local_device_ids so we can find devices for + # shard containers later + self._local_device_ids.add(node['id']) + if node['device'] not in devices_to_shard: + continue + part_filt = self._partition_dir_filter( + node['id'], + partitions_to_shard) + dirs.append((datadir, node, part_filt)) + if not dirs: + self.logger.warning('Found no data dirs!') + for part, path, node in db_replicator.roundrobin_datadirs(dirs): + # NB: get_part_nodes always provides an 'index' key; + # this will be used in leader selection + for primary in self.ring.get_part_nodes(int(part)): + if node['id'] == primary['id']: + node = primary + break + else: + # Set index such that we'll *never* be selected as a leader + node['index'] = 'handoff' + + broker = ContainerBroker(path, logger=self.logger, + timeout=self.broker_timeout) + error = None + try: + self._identify_sharding_candidate(broker, node) + if sharding_enabled(broker): + self._increment_stat('visited', 'attempted') + self._process_broker(broker, node, part) + self._increment_stat('visited', 'success', statsd=True) + else: + self._increment_stat('visited', 'skipped') + except (Exception, Timeout) as error: + self._increment_stat('visited', 'failure', statsd=True) + self.logger.exception( + 'Unhandled exception while processing %s: %s', path, error) + try: + self._record_sharding_progress(broker, node, error) + except (Exception, Timeout) as error: + self.logger.exception( + 'Unhandled exception while dumping progress for %s: %s', + path, error) + self._periodic_report_stats() + + self._report_stats() + + def run_forever(self, *args, **kwargs): + """Run the container sharder until stopped.""" + self.reported = time.time() + time.sleep(random() * self.interval) + while True: + begin = time.time() + try: + self._one_shard_cycle(devices_to_shard=Everything(), + partitions_to_shard=Everything()) + except (Exception, Timeout): + self.logger.increment('errors') + self.logger.exception('Exception in sharder') + elapsed = time.time() - begin + self.logger.info( + 'Container sharder cycle completed: %.02fs', elapsed) + if elapsed < self.interval: + time.sleep(self.interval - elapsed) + + def run_once(self, *args, **kwargs): + """Run the container sharder once.""" + self.logger.info('Begin container sharder "once" mode') + override_options = parse_override_options(once=True, **kwargs) + devices_to_shard = override_options.devices or Everything() + partitions_to_shard = override_options.partitions or Everything() + begin = self.reported = time.time() + self._one_shard_cycle(devices_to_shard=devices_to_shard, + partitions_to_shard=partitions_to_shard) + elapsed = time.time() - begin + self.logger.info( + 'Container sharder "once" mode completed: %.02fs', elapsed) diff --git a/swift/obj/server.py b/swift/obj/server.py index 36bd758d3f..2f584bb319 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -35,7 +35,7 @@ from swift.common.utils import public, get_logger, \ normalize_delete_at_timestamp, get_log_line, Timestamp, \ get_expirer_container, parse_mime_headers, \ iter_multipart_mime_documents, extract_swift_bytes, safe_json_loads, \ - config_auto_int_value + config_auto_int_value, split_path, get_redirect_data from swift.common.bufferedhttp import http_connect from swift.common.constraints import check_object_creation, \ valid_timestamp, check_utf8 @@ -44,7 +44,7 @@ from swift.common.exceptions import ConnectionTimeout, DiskFileQuarantined, \ DiskFileDeviceUnavailable, DiskFileExpired, ChunkReadTimeout, \ ChunkReadError, DiskFileXattrNotSupported from swift.obj import ssync_receiver -from swift.common.http import is_success +from swift.common.http import is_success, HTTP_MOVED_PERMANENTLY from swift.common.base_storage_server import BaseStorageServer from swift.common.header_key_dict import HeaderKeyDict from swift.common.request_helpers import get_name_and_placement, \ @@ -245,7 +245,7 @@ class ObjectController(BaseStorageServer): def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice, policy, - logger_thread_locals=None): + logger_thread_locals=None, container_path=None): """ Sends or saves an async update. @@ -263,11 +263,21 @@ class ObjectController(BaseStorageServer): :param logger_thread_locals: The thread local values to be set on the self.logger to retain transaction logging information. + :param container_path: optional path in the form `` + to which the update should be sent. If given this path will be used + instead of constructing a path from the ``account`` and + ``container`` params. """ if logger_thread_locals: self.logger.thread_locals = logger_thread_locals headers_out['user-agent'] = 'object-server %s' % os.getpid() - full_path = '/%s/%s/%s' % (account, container, obj) + if container_path: + # use explicitly specified container path + full_path = '/%s/%s' % (container_path, obj) + else: + full_path = '/%s/%s/%s' % (account, container, obj) + + redirect_data = None if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): @@ -277,15 +287,23 @@ class ObjectController(BaseStorageServer): with Timeout(self.node_timeout): response = conn.getresponse() response.read() - if is_success(response.status): - return - else: - self.logger.error(_( - 'ERROR Container update failed ' - '(saving for async update later): %(status)d ' - 'response from %(ip)s:%(port)s/%(dev)s'), - {'status': response.status, 'ip': ip, 'port': port, - 'dev': contdevice}) + if is_success(response.status): + return + + if response.status == HTTP_MOVED_PERMANENTLY: + try: + redirect_data = get_redirect_data(response) + except ValueError as err: + self.logger.error( + 'Container update failed for %r; problem with ' + 'redirect location: %s' % (obj, err)) + else: + self.logger.error(_( + 'ERROR Container update failed ' + '(saving for async update later): %(status)d ' + 'response from %(ip)s:%(port)s/%(dev)s'), + {'status': response.status, 'ip': ip, 'port': port, + 'dev': contdevice}) except (Exception, Timeout): self.logger.exception(_( 'ERROR container update failed with ' @@ -293,6 +311,13 @@ class ObjectController(BaseStorageServer): {'ip': ip, 'port': port, 'dev': contdevice}) data = {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out} + if redirect_data: + self.logger.debug( + 'Update to %(path)s redirected to %(redirect)s', + {'path': full_path, 'redirect': redirect_data[0]}) + container_path = redirect_data[0] + if container_path: + data['container_path'] = container_path timestamp = headers_out.get('x-meta-timestamp', headers_out.get('x-timestamp')) self._diskfile_router[policy].pickle_async_update( @@ -319,6 +344,7 @@ class ObjectController(BaseStorageServer): contdevices = [d.strip() for d in headers_in.get('X-Container-Device', '').split(',')] contpartition = headers_in.get('X-Container-Partition', '') + contpath = headers_in.get('X-Backend-Container-Path') if len(conthosts) != len(contdevices): # This shouldn't happen unless there's a bug in the proxy, @@ -331,6 +357,21 @@ class ObjectController(BaseStorageServer): 'devices': headers_in.get('X-Container-Device', '')}) return + if contpath: + try: + # TODO: this is very late in request handling to be validating + # a header - if we did *not* check and the header was bad + # presumably the update would fail and we would fall back to an + # async update to the root container, which might be best + # course of action rather than aborting update altogether? + split_path('/' + contpath, minsegs=2, maxsegs=2) + except ValueError: + self.logger.error( + "Invalid X-Backend-Container-Path, should be of the form " + "'account/container' but got %r." % contpath) + # fall back to updating root container + contpath = None + if contpartition: updates = zip(conthosts, contdevices) else: @@ -344,7 +385,8 @@ class ObjectController(BaseStorageServer): gt = spawn(self.async_update, op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice, policy, - logger_thread_locals=self.logger.thread_locals) + logger_thread_locals=self.logger.thread_locals, + container_path=contpath) update_greenthreads.append(gt) # Wait a little bit to see if the container updates are successful. # If we immediately return after firing off the greenthread above, then diff --git a/swift/obj/updater.py b/swift/obj/updater.py index df21c01d7b..febb754ce9 100644 --- a/swift/obj/updater.py +++ b/swift/obj/updater.py @@ -28,12 +28,14 @@ from swift.common.constraints import check_drive from swift.common.exceptions import ConnectionTimeout from swift.common.ring import Ring from swift.common.utils import get_logger, renamer, write_pickle, \ - dump_recon_cache, config_true_value, ratelimit_sleep, eventlet_monkey_patch + dump_recon_cache, config_true_value, ratelimit_sleep, split_path, \ + eventlet_monkey_patch, get_redirect_data from swift.common.daemon import Daemon from swift.common.header_key_dict import HeaderKeyDict from swift.common.storage_policy import split_policy_string, PolicyError from swift.obj.diskfile import get_tmp_dir, ASYNCDIR_BASE -from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR +from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR, \ + HTTP_MOVED_PERMANENTLY class SweepStats(object): @@ -41,12 +43,13 @@ class SweepStats(object): Stats bucket for an update sweep """ def __init__(self, errors=0, failures=0, quarantines=0, successes=0, - unlinks=0): + unlinks=0, redirects=0): self.errors = errors self.failures = failures self.quarantines = quarantines self.successes = successes self.unlinks = unlinks + self.redirects = redirects def copy(self): return type(self)(self.errors, self.failures, self.quarantines, @@ -57,7 +60,8 @@ class SweepStats(object): self.failures - other.failures, self.quarantines - other.quarantines, self.successes - other.successes, - self.unlinks - other.unlinks) + self.unlinks - other.unlinks, + self.redirects - other.redirects) def reset(self): self.errors = 0 @@ -65,6 +69,7 @@ class SweepStats(object): self.quarantines = 0 self.successes = 0 self.unlinks = 0 + self.redirects = 0 def __str__(self): keys = ( @@ -73,6 +78,7 @@ class SweepStats(object): (self.quarantines, 'quarantines'), (self.unlinks, 'unlinks'), (self.errors, 'errors'), + (self.redirects, 'redirects'), ) return ', '.join('%d %s' % pair for pair in keys) @@ -279,7 +285,8 @@ class ObjectUpdater(Daemon): 'in %(elapsed).02fs seconds:, ' '%(successes)d successes, %(failures)d failures, ' '%(quarantines)d quarantines, ' - '%(unlinks)d unlinks, %(errors)d errors ' + '%(unlinks)d unlinks, %(errors)d errors, ' + '%(redirects)d redirects ' '(pid: %(pid)d)'), {'device': device, 'elapsed': time.time() - start_time, @@ -288,7 +295,8 @@ class ObjectUpdater(Daemon): 'failures': sweep_totals.failures, 'quarantines': sweep_totals.quarantines, 'unlinks': sweep_totals.unlinks, - 'errors': sweep_totals.errors}) + 'errors': sweep_totals.errors, + 'redirects': sweep_totals.redirects}) def process_object_update(self, update_path, device, policy): """ @@ -309,44 +317,83 @@ class ObjectUpdater(Daemon): os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) return - successes = update.get('successes', []) - part, nodes = self.get_container_ring().get_nodes( - update['account'], update['container']) - obj = '/%s/%s/%s' % \ - (update['account'], update['container'], update['obj']) - headers_out = HeaderKeyDict(update['headers']) - headers_out['user-agent'] = 'object-updater %s' % os.getpid() - headers_out.setdefault('X-Backend-Storage-Policy-Index', - str(int(policy))) - events = [spawn(self.object_update, - node, part, update['op'], obj, headers_out) - for node in nodes if node['id'] not in successes] - success = True - new_successes = False - for event in events: - event_success, node_id = event.wait() - if event_success is True: - successes.append(node_id) - new_successes = True + + def do_update(): + successes = update.get('successes', []) + headers_out = HeaderKeyDict(update['headers'].copy()) + headers_out['user-agent'] = 'object-updater %s' % os.getpid() + headers_out.setdefault('X-Backend-Storage-Policy-Index', + str(int(policy))) + headers_out.setdefault('X-Backend-Accept-Redirect', 'true') + container_path = update.get('container_path') + if container_path: + acct, cont = split_path('/' + container_path, minsegs=2) else: - success = False - if success: - self.stats.successes += 1 - self.logger.increment('successes') - self.logger.debug('Update sent for %(obj)s %(path)s', - {'obj': obj, 'path': update_path}) - self.stats.unlinks += 1 - self.logger.increment('unlinks') - os.unlink(update_path) - else: - self.stats.failures += 1 - self.logger.increment('failures') - self.logger.debug('Update failed for %(obj)s %(path)s', - {'obj': obj, 'path': update_path}) - if new_successes: - update['successes'] = successes - write_pickle(update, update_path, os.path.join( - device, get_tmp_dir(policy))) + acct, cont = update['account'], update['container'] + part, nodes = self.get_container_ring().get_nodes(acct, cont) + obj = '/%s/%s/%s' % (acct, cont, update['obj']) + events = [spawn(self.object_update, + node, part, update['op'], obj, headers_out) + for node in nodes if node['id'] not in successes] + success = True + new_successes = rewrite_pickle = False + redirect = None + redirects = set() + for event in events: + event_success, node_id, redirect = event.wait() + if event_success is True: + successes.append(node_id) + new_successes = True + else: + success = False + if redirect: + redirects.add(redirect) + + if success: + self.stats.successes += 1 + self.logger.increment('successes') + self.logger.debug('Update sent for %(obj)s %(path)s', + {'obj': obj, 'path': update_path}) + self.stats.unlinks += 1 + self.logger.increment('unlinks') + os.unlink(update_path) + elif redirects: + # erase any previous successes + update.pop('successes', None) + redirect = max(redirects, key=lambda x: x[-1])[0] + redirect_history = update.setdefault('redirect_history', []) + if redirect in redirect_history: + # force next update to be sent to root, reset history + update['container_path'] = None + update['redirect_history'] = [] + else: + update['container_path'] = redirect + redirect_history.append(redirect) + self.stats.redirects += 1 + self.logger.increment("redirects") + self.logger.debug( + 'Update redirected for %(obj)s %(path)s to %(shard)s', + {'obj': obj, 'path': update_path, + 'shard': update['container_path']}) + rewrite_pickle = True + else: + self.stats.failures += 1 + self.logger.increment('failures') + self.logger.debug('Update failed for %(obj)s %(path)s', + {'obj': obj, 'path': update_path}) + if new_successes: + update['successes'] = successes + rewrite_pickle = True + + return rewrite_pickle, redirect + + rewrite_pickle, redirect = do_update() + if redirect: + # make one immediate retry to the redirect location + rewrite_pickle, redirect = do_update() + if rewrite_pickle: + write_pickle(update, update_path, os.path.join( + device, get_tmp_dir(policy))) def object_update(self, node, part, op, obj, headers_out): """ @@ -357,7 +404,12 @@ class ObjectUpdater(Daemon): :param op: operation performed (ex: 'PUT' or 'DELETE') :param obj: object name being updated :param headers_out: headers to send with the update + :return: a tuple of (``success``, ``node_id``, ``redirect``) + where ``success`` is True if the update succeeded, ``node_id`` is + the_id of the node updated and ``redirect`` is either None or a + tuple of (a path, a timestamp string). """ + redirect = None try: with ConnectionTimeout(self.conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], @@ -365,15 +417,24 @@ class ObjectUpdater(Daemon): with Timeout(self.node_timeout): resp = conn.getresponse() resp.read() - success = is_success(resp.status) - if not success: - self.logger.debug( - _('Error code %(status)d is returned from remote ' - 'server %(ip)s: %(port)s / %(device)s'), - {'status': resp.status, 'ip': node['ip'], - 'port': node['port'], 'device': node['device']}) - return (success, node['id']) + + if resp.status == HTTP_MOVED_PERMANENTLY: + try: + redirect = get_redirect_data(resp) + except ValueError as err: + self.logger.error( + 'Container update failed for %r; problem with ' + 'redirect location: %s' % (obj, err)) + + success = is_success(resp.status) + if not success: + self.logger.debug( + _('Error code %(status)d is returned from remote ' + 'server %(ip)s: %(port)s / %(device)s'), + {'status': resp.status, 'ip': node['ip'], + 'port': node['port'], 'device': node['device']}) + return success, node['id'], redirect except (Exception, Timeout): self.logger.exception(_('ERROR with remote server ' '%(ip)s:%(port)s/%(device)s'), node) - return HTTP_INTERNAL_SERVER_ERROR, node['id'] + return HTTP_INTERNAL_SERVER_ERROR, node['id'], redirect diff --git a/swift/proxy/controllers/base.py b/swift/proxy/controllers/base.py index df0ea71b89..4822b01729 100644 --- a/swift/proxy/controllers/base.py +++ b/swift/proxy/controllers/base.py @@ -28,6 +28,7 @@ from six.moves.urllib.parse import quote import os import time +import json import functools import inspect import itertools @@ -40,11 +41,11 @@ from eventlet import sleep from eventlet.timeout import Timeout import six -from swift.common.wsgi import make_pre_authed_env +from swift.common.wsgi import make_pre_authed_env, make_pre_authed_request from swift.common.utils import Timestamp, config_true_value, \ public, split_path, list_from_csv, GreenthreadSafeIterator, \ GreenAsyncPile, quorum_size, parse_content_type, \ - document_iters_to_http_response_body + document_iters_to_http_response_body, ShardRange from swift.common.bufferedhttp import http_connect from swift.common import constraints from swift.common.exceptions import ChunkReadTimeout, ChunkWriteTimeout, \ @@ -188,6 +189,7 @@ def headers_to_container_info(headers, status_int=HTTP_OK): }, 'meta': meta, 'sysmeta': sysmeta, + 'sharding_state': headers.get('x-backend-sharding-state', 'unsharded'), } @@ -375,6 +377,9 @@ def get_container_info(env, app, swift_source=None): else: info[field] = int(info[field]) + if info.get('sharding_state') is None: + info['sharding_state'] = 'unsharded' + return info @@ -1994,3 +1999,91 @@ class Controller(object): else: raise ValueError( "server_type can only be 'account' or 'container'") + + def _get_container_listing(self, req, account, container, headers=None, + params=None): + """ + Fetch container listing from given `account/container`. + + :param req: original Request instance. + :param account: account in which `container` is stored. + :param container: container from listing should be fetched. + :param headers: headers to be included with the request + :param params: query string parameters to be used. + :return: a tuple of (deserialized json data structure, swob Response) + """ + params = params or {} + version, _a, _c, _other = req.split_path(3, 4, True) + path = '/'.join(['', version, account, container]) + + subreq = make_pre_authed_request( + req.environ, method='GET', path=quote(path), headers=req.headers, + swift_source='SH') + if headers: + subreq.headers.update(headers) + subreq.params = params + self.app.logger.debug( + 'Get listing from %s %s' % (subreq.path_qs, headers)) + response = self.app.handle_request(subreq) + + if not is_success(response.status_int): + self.app.logger.warning( + 'Failed to get container listing from %s: %s', + subreq.path_qs, response.status_int) + return None, response + + try: + data = json.loads(response.body) + if not isinstance(data, list): + raise ValueError('not a list') + return data, response + except ValueError as err: + self.app.logger.error( + 'Problem with listing response from %s: %r', + subreq.path_qs, err) + return None, response + + def _get_shard_ranges(self, req, account, container, includes=None, + states=None): + """ + Fetch shard ranges from given `account/container`. If `includes` is + given then the shard range for that object name is requested, otherwise + all shard ranges are requested. + + :param req: original Request instance. + :param account: account from which shard ranges should be fetched. + :param container: container from which shard ranges should be fetched. + :param includes: (optional) restricts the list of fetched shard ranges + to those which include the given name. + :param states: (optional) the states of shard ranges to be fetched. + :return: a list of instances of :class:`swift.common.utils.ShardRange`, + or None if there was a problem fetching the shard ranges + """ + params = req.params.copy() + params.pop('limit', None) + params['format'] = 'json' + if includes: + params['includes'] = includes + if states: + params['states'] = states + headers = {'X-Backend-Record-Type': 'shard'} + listing, response = self._get_container_listing( + req, account, container, headers=headers, params=params) + if listing is None: + return None + + record_type = response.headers.get('x-backend-record-type') + if record_type != 'shard': + err = 'unexpected record type %r' % record_type + self.app.logger.error("Failed to get shard ranges from %s: %s", + req.path_qs, err) + return None + + try: + return [ShardRange.from_dict(shard_range) + for shard_range in listing] + except (ValueError, TypeError, KeyError) as err: + self.app.logger.error( + "Failed to get shard ranges from %s: invalid data: %r", + req.path_qs, err) + return None diff --git a/swift/proxy/controllers/container.py b/swift/proxy/controllers/container.py index 15c67858ea..e90632a294 100644 --- a/swift/proxy/controllers/container.py +++ b/swift/proxy/controllers/container.py @@ -14,11 +14,14 @@ # limitations under the License. from swift import gettext_ as _ +import json from six.moves.urllib.parse import unquote -from swift.common.utils import public, csv_append, Timestamp -from swift.common.constraints import check_metadata +from swift.common.utils import public, csv_append, Timestamp, \ + config_true_value, ShardRange +from swift.common.constraints import check_metadata, CONTAINER_LISTING_LIMIT from swift.common.http import HTTP_ACCEPTED, is_success +from swift.common.request_helpers import get_sys_meta_prefix from swift.proxy.controllers.base import Controller, delay_denial, \ cors_validation, set_info_cache, clear_info_cache from swift.common.storage_policy import POLICIES @@ -84,7 +87,9 @@ class ContainerController(Controller): def GETorHEAD(self, req): """Handler for HTTP GET/HEAD requests.""" ai = self.account_info(self.account_name, req) - if not ai[1]: + auto_account = self.account_name.startswith( + self.app.auto_create_account_prefix) + if not (auto_account or ai[1]): if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: @@ -101,10 +106,20 @@ class ContainerController(Controller): node_iter = self.app.iter_nodes(self.app.container_ring, part) params = req.params params['format'] = 'json' + record_type = req.headers.get('X-Backend-Record-Type', '').lower() + if not record_type: + record_type = 'auto' + req.headers['X-Backend-Record-Type'] = 'auto' + params['states'] = 'listing' req.params = params resp = self.GETorHEAD_base( req, _('Container'), node_iter, part, req.swift_entity_path, concurrency) + resp_record_type = resp.headers.get('X-Backend-Record-Type', '') + if all((req.method == "GET", record_type == 'auto', + resp_record_type.lower() == 'shard')): + resp = self._get_from_shards(req, resp) + # Cache this. We just made a request to a storage node and got # up-to-date information for the container. resp.headers['X-Backend-Recheck-Container-Existence'] = str( @@ -122,6 +137,104 @@ class ContainerController(Controller): for key in self.app.swift_owner_headers: if key in resp.headers: del resp.headers[key] + # Expose sharding state in reseller requests + if req.environ.get('reseller_request', False): + resp.headers['X-Container-Sharding'] = config_true_value( + resp.headers.get(get_sys_meta_prefix('container') + 'Sharding', + 'False')) + return resp + + def _get_from_shards(self, req, resp): + # construct listing using shards described by the response body + shard_ranges = [ShardRange.from_dict(data) + for data in json.loads(resp.body)] + self.app.logger.debug('GET listing from %s shards for: %s', + len(shard_ranges), req.path_qs) + if not shard_ranges: + # can't find ranges or there was a problem getting the ranges. So + # return what we have. + return resp + + objects = [] + req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT)) + params = req.params.copy() + params.pop('states', None) + req.headers.pop('X-Backend-Record-Type', None) + reverse = config_true_value(params.get('reverse')) + marker = params.get('marker') + end_marker = params.get('end_marker') + + limit = req_limit + for shard_range in shard_ranges: + params['limit'] = limit + # Always set marker to ensure that object names less than or equal + # to those already in the listing are not fetched + if objects: + last_name = objects[-1].get('name', + objects[-1].get('subdir', u'')) + params['marker'] = last_name.encode('utf-8') + elif reverse and marker and marker > shard_range.lower: + params['marker'] = marker + elif marker and marker <= shard_range.upper: + params['marker'] = marker + else: + params['marker'] = shard_range.upper_str if reverse \ + else shard_range.lower_str + if params['marker'] and reverse: + params['marker'] += '\x00' + + # Always set end_marker to ensure that misplaced objects beyond + # the expected shard range are not fetched + if end_marker and end_marker in shard_range: + params['end_marker'] = end_marker + else: + params['end_marker'] = shard_range.lower_str if reverse \ + else shard_range.upper_str + if params['end_marker'] and not reverse: + params['end_marker'] += '\x00' + + if (shard_range.account == self.account_name and + shard_range.container == self.container_name): + # directed back to same container - force GET of objects + headers = {'X-Backend-Record-Type': 'object'} + else: + headers = None + self.app.logger.debug('Getting from %s %s with %s', + shard_range, shard_range.name, headers) + objs, shard_resp = self._get_container_listing( + req, shard_range.account, shard_range.container, + headers=headers, params=params) + + if not objs: + # tolerate errors or empty shard containers + continue + + objects.extend(objs) + limit -= len(objs) + + if limit <= 0: + break + elif (end_marker and reverse and + end_marker >= objects[-1]['name'].encode('utf-8')): + break + elif (end_marker and not reverse and + end_marker <= objects[-1]['name'].encode('utf-8')): + break + + resp.body = json.dumps(objects) + constrained = any(req.params.get(constraint) for constraint in ( + 'marker', 'end_marker', 'path', 'prefix', 'delimiter')) + if not constrained and len(objects) < req_limit: + self.app.logger.debug('Setting object count to %s' % len(objects)) + # prefer the actual listing stats over the potentially outdated + # root stats. This condition is only likely when a sharded + # container is shrinking or in tests; typically a sharded container + # will have more than CONTAINER_LISTING_LIMIT objects so any + # unconstrained listing will be capped by the limit and total + # object stats cannot therefore be inferred from the listing. + resp.headers['X-Container-Object-Count'] = len(objects) + resp.headers['X-Container-Bytes-Used'] = sum( + [o['bytes'] for o in objects]) return resp @public @@ -150,6 +263,10 @@ class ContainerController(Controller): if not req.environ.get('swift_owner'): for key in self.app.swift_owner_headers: req.headers.pop(key, None) + if req.environ.get('reseller_request', False) and \ + 'X-Container-Sharding' in req.headers: + req.headers[get_sys_meta_prefix('container') + 'Sharding'] = \ + str(config_true_value(req.headers['X-Container-Sharding'])) length_limit = self.get_name_length_limit() if len(self.container_name) > length_limit: resp = HTTPBadRequest(request=req) @@ -198,6 +315,10 @@ class ContainerController(Controller): if not req.environ.get('swift_owner'): for key in self.app.swift_owner_headers: req.headers.pop(key, None) + if req.environ.get('reseller_request', False) and \ + 'X-Container-Sharding' in req.headers: + req.headers[get_sys_meta_prefix('container') + 'Sharding'] = \ + str(config_true_value(req.headers['X-Container-Sharding'])) account_partition, accounts, container_count = \ self.account_info(self.account_name, req) if not accounts: diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py index d8aadf7935..7a41ef3c53 100644 --- a/swift/proxy/controllers/obj.py +++ b/swift/proxy/controllers/obj.py @@ -266,6 +266,20 @@ class BaseObjectController(Controller): """Handler for HTTP HEAD requests.""" return self.GETorHEAD(req) + def _get_update_target(self, req, container_info): + # find the sharded container to which we'll send the update + db_state = container_info.get('sharding_state', 'unsharded') + if db_state in ('sharded', 'sharding'): + shard_ranges = self._get_shard_ranges( + req, self.account_name, self.container_name, + includes=self.object_name, states='updating') + if shard_ranges: + partition, nodes = self.app.container_ring.get_nodes( + shard_ranges[0].account, shard_ranges[0].container) + return partition, nodes, shard_ranges[0].name + + return container_info['partition'], container_info['nodes'], None + @public @cors_validation @delay_denial @@ -273,8 +287,8 @@ class BaseObjectController(Controller): """HTTP POST request handler.""" container_info = self.container_info( self.account_name, self.container_name, req) - container_partition = container_info['partition'] - container_nodes = container_info['nodes'] + container_partition, container_nodes, container_path = \ + self._get_update_target(req, container_info) req.acl = container_info['write_acl'] if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) @@ -304,13 +318,14 @@ class BaseObjectController(Controller): headers = self._backend_requests( req, len(nodes), container_partition, container_nodes, - delete_at_container, delete_at_part, delete_at_nodes) + delete_at_container, delete_at_part, delete_at_nodes, + container_path=container_path) return self._post_object(req, obj_ring, partition, headers) def _backend_requests(self, req, n_outgoing, container_partition, containers, delete_at_container=None, delete_at_partition=None, - delete_at_nodes=None): + delete_at_nodes=None, container_path=None): policy_index = req.headers['X-Backend-Storage-Policy-Index'] policy = POLICIES.get_by_index(policy_index) headers = [self.generate_request_headers(req, additional=req.headers) @@ -324,6 +339,8 @@ class BaseObjectController(Controller): headers[index]['X-Container-Device'] = csv_append( headers[index].get('X-Container-Device'), container['device']) + if container_path: + headers[index]['X-Backend-Container-Path'] = container_path def set_delete_at_headers(index, delete_at_node): headers[index]['X-Delete-At-Container'] = delete_at_container @@ -752,8 +769,8 @@ class BaseObjectController(Controller): policy_index = req.headers.get('X-Backend-Storage-Policy-Index', container_info['storage_policy']) obj_ring = self.app.get_object_ring(policy_index) - container_nodes = container_info['nodes'] - container_partition = container_info['partition'] + container_partition, container_nodes, container_path = \ + self._get_update_target(req, container_info) partition, nodes = obj_ring.get_nodes( self.account_name, self.container_name, self.object_name) @@ -800,7 +817,8 @@ class BaseObjectController(Controller): # add special headers to be handled by storage nodes outgoing_headers = self._backend_requests( req, len(nodes), container_partition, container_nodes, - delete_at_container, delete_at_part, delete_at_nodes) + delete_at_container, delete_at_part, delete_at_nodes, + container_path=container_path) # send object to storage nodes resp = self._store_object( @@ -823,8 +841,8 @@ class BaseObjectController(Controller): next_part_power = getattr(obj_ring, 'next_part_power', None) if next_part_power: req.headers['X-Backend-Next-Part-Power'] = next_part_power - container_partition = container_info['partition'] - container_nodes = container_info['nodes'] + container_partition, container_nodes, container_path = \ + self._get_update_target(req, container_info) req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] if 'swift.authorize' in req.environ: @@ -851,7 +869,8 @@ class BaseObjectController(Controller): node_count += local_handoffs headers = self._backend_requests( - req, node_count, container_partition, container_nodes) + req, node_count, container_partition, container_nodes, + container_path=container_path) return self._delete_object(req, obj_ring, partition, headers) diff --git a/test/__init__.py b/test/__init__.py index 1a56597158..51e3aa9d82 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -17,7 +17,11 @@ # The code below enables nosetests to work with i18n _() blocks from __future__ import print_function import sys +from contextlib import contextmanager + import os +from six import reraise + try: from unittest.util import safe_repr except ImportError: @@ -86,3 +90,26 @@ def listen_zero(): sock.bind(("127.0.0.1", 0)) sock.listen(50) return sock + + +@contextmanager +def annotate_failure(msg): + """ + Catch AssertionError and annotate it with a message. Useful when making + assertions in a loop where the message can indicate the loop index or + richer context about the failure. + + :param msg: A message to be prefixed to the AssertionError message. + """ + try: + yield + except AssertionError as err: + err_typ, err_val, err_tb = sys.exc_info() + if err_val.args: + msg = '%s Failed with %s' % (msg, err_val.args[0]) + err_val.args = (msg, ) + err_val.args[1:] + else: + # workaround for some IDE's raising custom AssertionErrors + err_val = '%s Failed with %s' % (msg, err) + err_typ = AssertionError + reraise(err_typ, err_val, err_tb) diff --git a/test/probe/brain.py b/test/probe/brain.py index 843754210e..fd597cf6b3 100644 --- a/test/probe/brain.py +++ b/test/probe/brain.py @@ -99,9 +99,11 @@ class BrainSplitter(object): raise ValueError('Unknown server_type: %r' % server_type) self.server_type = server_type - part, nodes = self.ring.get_nodes(self.account, c, o) + self.part, self.nodes = self.ring.get_nodes(self.account, c, o) + + node_ids = [n['id'] for n in self.nodes] + self.node_numbers = [n + 1 for n in node_ids] - node_ids = [n['id'] for n in nodes] if all(n_id in node_ids for n_id in (0, 1)): self.primary_numbers = (1, 2) self.handoff_numbers = (3, 4) diff --git a/test/probe/common.py b/test/probe/common.py index ccb5751f26..5622d71b64 100644 --- a/test/probe/common.py +++ b/test/probe/common.py @@ -14,6 +14,8 @@ # limitations under the License. from __future__ import print_function + +import errno import os from subprocess import Popen, PIPE import sys @@ -125,13 +127,17 @@ def kill_server(ipport, ipport2server): if err: raise Exception('unable to kill %s' % (server if not number else '%s%s' % (server, number))) + return wait_for_server_to_hangup(ipport) + + +def wait_for_server_to_hangup(ipport): try_until = time() + 30 while True: try: conn = HTTPConnection(*ipport) conn.request('GET', '/') conn.getresponse() - except Exception as err: + except Exception: break if time() > try_until: raise Exception( @@ -334,33 +340,35 @@ class ProbeTest(unittest.TestCase): Don't instantiate this directly, use a child class instead. """ + def _load_rings_and_configs(self): + self.ipport2server = {} + self.configs = defaultdict(dict) + self.account_ring = get_ring( + 'account', + self.acct_cont_required_replicas, + self.acct_cont_required_devices, + ipport2server=self.ipport2server, + config_paths=self.configs) + self.container_ring = get_ring( + 'container', + self.acct_cont_required_replicas, + self.acct_cont_required_devices, + ipport2server=self.ipport2server, + config_paths=self.configs) + self.policy = get_policy(**self.policy_requirements) + self.object_ring = get_ring( + self.policy.ring_name, + self.obj_required_replicas, + self.obj_required_devices, + server='object', + ipport2server=self.ipport2server, + config_paths=self.configs) + def setUp(self): resetswift() kill_orphans() + self._load_rings_and_configs() try: - self.ipport2server = {} - self.configs = defaultdict(dict) - self.account_ring = get_ring( - 'account', - self.acct_cont_required_replicas, - self.acct_cont_required_devices, - ipport2server=self.ipport2server, - config_paths=self.configs) - self.container_ring = get_ring( - 'container', - self.acct_cont_required_replicas, - self.acct_cont_required_devices, - ipport2server=self.ipport2server, - config_paths=self.configs) - self.policy = get_policy(**self.policy_requirements) - self.object_ring = get_ring( - self.policy.ring_name, - self.obj_required_replicas, - self.obj_required_devices, - server='object', - ipport2server=self.ipport2server, - config_paths=self.configs) - self.servers_per_port = any( int(readconf(c, section_name='object-replicator').get( 'servers_per_port', '0')) @@ -489,6 +497,49 @@ class ProbeTest(unittest.TestCase): finally: shutil.rmtree(tempdir) + def get_all_object_nodes(self): + """ + Returns a list of all nodes in all object storage policies. + + :return: a list of node dicts. + """ + all_obj_nodes = {} + for policy in ENABLED_POLICIES: + for dev in policy.object_ring.devs: + all_obj_nodes[dev['device']] = dev + return all_obj_nodes.values() + + def gather_async_pendings(self, onodes): + """ + Returns a list of paths to async pending files found on given nodes. + + :param onodes: a list of nodes. + :return: a list of file paths. + """ + async_pendings = [] + for onode in onodes: + device_dir = self.device_dir('', onode) + for ap_pol_dir in os.listdir(device_dir): + if not ap_pol_dir.startswith('async_pending'): + # skip 'objects', 'containers', etc. + continue + async_pending_dir = os.path.join(device_dir, ap_pol_dir) + try: + ap_dirs = os.listdir(async_pending_dir) + except OSError as err: + if err.errno == errno.ENOENT: + pass + else: + raise + else: + for ap_dir in ap_dirs: + ap_dir_fullpath = os.path.join( + async_pending_dir, ap_dir) + async_pendings.extend([ + os.path.join(ap_dir_fullpath, ent) + for ent in os.listdir(ap_dir_fullpath)]) + return async_pendings + class ReplProbeTest(ProbeTest): diff --git a/test/probe/test_object_expirer.py b/test/probe/test_object_expirer.py index 92642f19d6..ad31662730 100644 --- a/test/probe/test_object_expirer.py +++ b/test/probe/test_object_expirer.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import errno -import os import random import time import uuid @@ -143,31 +141,6 @@ class TestObjectExpirer(ReplProbeTest): # tha the object server does not write out any async pendings; this # test asserts that this is the case. - def gather_async_pendings(onodes): - async_pendings = [] - for onode in onodes: - device_dir = self.device_dir('', onode) - for ap_pol_dir in os.listdir(device_dir): - if not ap_pol_dir.startswith('async_pending'): - # skip 'objects', 'containers', etc. - continue - async_pending_dir = os.path.join(device_dir, ap_pol_dir) - try: - ap_dirs = os.listdir(async_pending_dir) - except OSError as err: - if err.errno == errno.ENOENT: - pass - else: - raise - else: - for ap_dir in ap_dirs: - ap_dir_fullpath = os.path.join( - async_pending_dir, ap_dir) - async_pendings.extend([ - os.path.join(ap_dir_fullpath, ent) - for ent in os.listdir(ap_dir_fullpath)]) - return async_pendings - # Make an expiring object in each policy for policy in ENABLED_POLICIES: container_name = "expirer-test-%d" % policy.idx @@ -191,15 +164,12 @@ class TestObjectExpirer(ReplProbeTest): # Make sure there's no async_pendings anywhere. Probe tests only run # on single-node installs anyway, so this set should be small enough # that an exhaustive check doesn't take too long. - all_obj_nodes = {} - for policy in ENABLED_POLICIES: - for dev in policy.object_ring.devs: - all_obj_nodes[dev['device']] = dev - pendings_before = gather_async_pendings(all_obj_nodes.values()) + all_obj_nodes = self.get_all_object_nodes() + pendings_before = self.gather_async_pendings(all_obj_nodes) # expire the objects Manager(['object-expirer']).once() - pendings_after = gather_async_pendings(all_obj_nodes.values()) + pendings_after = self.gather_async_pendings(all_obj_nodes) self.assertEqual(pendings_after, pendings_before) def test_expirer_object_should_not_be_expired(self): diff --git a/test/probe/test_sharder.py b/test/probe/test_sharder.py new file mode 100644 index 0000000000..77ee3dd35b --- /dev/null +++ b/test/probe/test_sharder.py @@ -0,0 +1,2025 @@ +# Copyright (c) 2017 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import hashlib +import json +import os +import shutil +import uuid + +from nose import SkipTest + +from swift.common import direct_client +from swift.common.direct_client import DirectClientException +from swift.common.utils import ShardRange, parse_db_filename, get_db_files, \ + quorum_size, config_true_value, Timestamp +from swift.container.backend import ContainerBroker, UNSHARDED, SHARDING +from swift.common import utils +from swift.common.manager import Manager +from swiftclient import client, get_auth, ClientException + +from swift.proxy.controllers.obj import num_container_updates +from test import annotate_failure +from test.probe.brain import BrainSplitter +from test.probe.common import ReplProbeTest, get_server_number, \ + wait_for_server_to_hangup + + +MIN_SHARD_CONTAINER_THRESHOLD = 4 +MAX_SHARD_CONTAINER_THRESHOLD = 100 + + +class ShardCollector(object): + """ + Returns map of node to tuples of (headers, shard ranges) returned from node + """ + def __init__(self): + self.ranges = {} + + def __call__(self, cnode, cpart, account, container): + self.ranges[cnode['id']] = direct_client.direct_get_container( + cnode, cpart, account, container, + headers={'X-Backend-Record-Type': 'shard'}) + + +class BaseTestContainerSharding(ReplProbeTest): + + def _maybe_skip_test(self): + try: + cont_configs = [utils.readconf(p, 'container-sharder') + for p in self.configs['container-server'].values()] + except ValueError: + raise SkipTest('No [container-sharder] section found in ' + 'container-server configs') + + skip_reasons = [] + auto_shard = all([config_true_value(c.get('auto_shard', False)) + for c in cont_configs]) + if not auto_shard: + skip_reasons.append( + 'auto_shard must be true in all container_sharder configs') + + self.max_shard_size = max( + int(c.get('shard_container_threshold', '1000000')) + for c in cont_configs) + + if not (MIN_SHARD_CONTAINER_THRESHOLD <= self.max_shard_size + <= MAX_SHARD_CONTAINER_THRESHOLD): + skip_reasons.append( + 'shard_container_threshold %d must be between %d and %d' % + (self.max_shard_size, MIN_SHARD_CONTAINER_THRESHOLD, + MAX_SHARD_CONTAINER_THRESHOLD)) + + def skip_check(reason_list, option, required): + values = set([int(c.get(option, required)) for c in cont_configs]) + if values != {required}: + reason_list.append('%s must be %s' % (option, required)) + + skip_check(skip_reasons, 'shard_scanner_batch_size', 10) + skip_check(skip_reasons, 'shard_batch_size', 2) + + if skip_reasons: + raise SkipTest(', '.join(skip_reasons)) + + def _load_rings_and_configs(self): + super(BaseTestContainerSharding, self)._load_rings_and_configs() + # perform checks for skipping test before starting services + self._maybe_skip_test() + + def _make_object_names(self, number): + return ['obj-%04d' % x for x in range(number)] + + def _setup_container_name(self): + self.container_name = 'container-%s' % uuid.uuid4() + + def setUp(self): + client.logger.setLevel(client.logging.WARNING) + client.requests.logging.getLogger().setLevel( + client.requests.logging.WARNING) + super(BaseTestContainerSharding, self).setUp() + _, self.admin_token = get_auth( + 'http://127.0.0.1:8080/auth/v1.0', 'admin:admin', 'admin') + self._setup_container_name() + self.brain = BrainSplitter(self.url, self.token, self.container_name, + None, 'container') + self.brain.put_container(policy_index=int(self.policy)) + self.sharders = Manager(['container-sharder']) + self.internal_client = self.make_internal_client() + + def stop_container_servers(self, node_numbers=None): + if node_numbers: + ipports = [] + server2ipport = {v: k for k, v in self.ipport2server.items()} + for number in self.brain.node_numbers[node_numbers]: + self.brain.servers.stop(number=number) + server = 'container%d' % number + ipports.append(server2ipport[server]) + else: + ipports = [k for k, v in self.ipport2server.items() + if v.startswith('container')] + self.brain.servers.stop() + for ipport in ipports: + wait_for_server_to_hangup(ipport) + + def put_objects(self, obj_names): + for obj in obj_names: + client.put_object(self.url, self.token, self.container_name, obj) + + def delete_objects(self, obj_names): + for obj in obj_names: + client.delete_object( + self.url, self.token, self.container_name, obj) + + def get_container_shard_ranges(self, account=None, container=None): + account = account if account else self.account + container = container if container else self.container_name + path = self.internal_client.make_path(account, container) + resp = self.internal_client.make_request( + 'GET', path + '?format=json', {'X-Backend-Record-Type': 'shard'}, + [200]) + return [ShardRange.from_dict(sr) for sr in json.loads(resp.body)] + + def direct_container_op(self, func, account=None, container=None, + expect_failure=False): + account = account if account else self.account + container = container if container else self.container_name + cpart, cnodes = self.container_ring.get_nodes(account, container) + unexpected_responses = [] + results = {} + for cnode in cnodes: + try: + results[cnode['id']] = func(cnode, cpart, account, container) + except DirectClientException as err: + if not expect_failure: + unexpected_responses.append((cnode, err)) + else: + if expect_failure: + unexpected_responses.append((cnode, 'success')) + if unexpected_responses: + self.fail('Unexpected responses: %s' % unexpected_responses) + return results + + def direct_get_container_shard_ranges(self, account=None, container=None, + expect_failure=False): + collector = ShardCollector() + self.direct_container_op( + collector, account, container, expect_failure) + return collector.ranges + + def direct_delete_container(self, account=None, container=None, + expect_failure=False): + self.direct_container_op(direct_client.direct_delete_container, + account, container, expect_failure) + + def direct_head_container(self, account=None, container=None, + expect_failure=False): + return self.direct_container_op(direct_client.direct_head_container, + account, container, expect_failure) + + def get_storage_dir(self, part, node, account=None, container=None): + account = account or self.brain.account + container = container or self.container_name + server_type, config_number = get_server_number( + (node['ip'], node['port']), self.ipport2server) + assert server_type == 'container' + repl_server = '%s-replicator' % server_type + conf = utils.readconf(self.configs[repl_server][config_number], + section_name=repl_server) + datadir = os.path.join(conf['devices'], node['device'], 'containers') + container_hash = utils.hash_path(account, container) + return (utils.storage_directory(datadir, part, container_hash), + container_hash) + + def get_broker(self, part, node, account=None, container=None): + container_dir, container_hash = self.get_storage_dir( + part, node, account=account, container=container) + db_file = os.path.join(container_dir, container_hash + '.db') + self.assertTrue(get_db_files(db_file)) # sanity check + return ContainerBroker(db_file) + + def categorize_container_dir_content(self, account=None, container=None): + account = account or self.brain.account + container = container or self.container_name + part, nodes = self.brain.ring.get_nodes(account, container) + storage_dirs = [ + self.get_storage_dir(part, node, account=account, + container=container)[0] + for node in nodes] + result = { + 'shard_dbs': [], + 'normal_dbs': [], + 'pendings': [], + 'locks': [], + 'other': [], + } + for storage_dir in storage_dirs: + for f in os.listdir(storage_dir): + path = os.path.join(storage_dir, f) + if path.endswith('.db'): + hash_, epoch, ext = parse_db_filename(path) + if epoch: + result['shard_dbs'].append(path) + else: + result['normal_dbs'].append(path) + elif path.endswith('.db.pending'): + result['pendings'].append(path) + elif path.endswith('/.lock'): + result['locks'].append(path) + else: + result['other'].append(path) + if result['other']: + self.fail('Found unexpected files in storage directory:\n %s' % + '\n '.join(result['other'])) + return result + + def assertLengthEqual(self, obj, length): + obj_len = len(obj) + self.assertEqual(obj_len, length, 'len(%r) == %d, not %d' % ( + obj, obj_len, length)) + + def assert_dict_contains(self, expected_items, actual_dict): + ignored = set(expected_items) ^ set(actual_dict) + filtered_actual = dict((k, actual_dict[k]) + for k in actual_dict if k not in ignored) + self.assertEqual(expected_items, filtered_actual) + + def assert_shard_ranges_contiguous(self, expected_number, shard_ranges, + first_lower='', last_upper=''): + if shard_ranges and isinstance(shard_ranges[0], ShardRange): + actual_shard_ranges = sorted(shard_ranges) + else: + actual_shard_ranges = sorted([ShardRange.from_dict(d) + for d in shard_ranges]) + self.assertLengthEqual(actual_shard_ranges, expected_number) + if expected_number: + with annotate_failure('Ranges %s.' % actual_shard_ranges): + self.assertEqual(first_lower, actual_shard_ranges[0].lower_str) + for x, y in zip(actual_shard_ranges, actual_shard_ranges[1:]): + self.assertEqual(x.upper, y.lower) + self.assertEqual(last_upper, actual_shard_ranges[-1].upper_str) + + def assert_shard_range_equal(self, expected, actual, excludes=None): + excludes = excludes or [] + expected_dict = dict(expected) + actual_dict = dict(actual) + for k in excludes: + expected_dict.pop(k, None) + actual_dict.pop(k, None) + self.assertEqual(expected_dict, actual_dict) + + def assert_shard_range_lists_equal(self, expected, actual, excludes=None): + self.assertEqual(len(expected), len(actual)) + for expected, actual in zip(expected, actual): + self.assert_shard_range_equal(expected, actual, excludes=excludes) + + def assert_shard_range_state(self, expected_state, shard_ranges): + if shard_ranges and not isinstance(shard_ranges[0], ShardRange): + shard_ranges = [ShardRange.from_dict(data) + for data in shard_ranges] + self.assertEqual([expected_state] * len(shard_ranges), + [sr.state for sr in shard_ranges]) + + def assert_total_object_count(self, expected_object_count, shard_ranges): + actual = sum([sr['object_count'] for sr in shard_ranges]) + self.assertEqual(expected_object_count, actual) + + def assert_container_listing(self, expected_listing): + headers, actual_listing = client.get_container( + self.url, self.token, self.container_name) + self.assertIn('x-container-object-count', headers) + expected_obj_count = len(expected_listing) + self.assertEqual(expected_listing, [ + x['name'].encode('utf-8') for x in actual_listing]) + self.assertEqual(str(expected_obj_count), + headers['x-container-object-count']) + return headers, actual_listing + + def assert_container_object_count(self, expected_obj_count): + headers = client.head_container( + self.url, self.token, self.container_name) + self.assertIn('x-container-object-count', headers) + self.assertEqual(str(expected_obj_count), + headers['x-container-object-count']) + + def assert_container_post_ok(self, meta_value): + key = 'X-Container-Meta-Assert-Post-Works' + headers = {key: meta_value} + client.post_container( + self.url, self.token, self.container_name, headers=headers) + resp_headers = client.head_container( + self.url, self.token, self.container_name) + self.assertEqual(meta_value, resp_headers.get(key.lower())) + + def assert_container_post_fails(self, meta_value): + key = 'X-Container-Meta-Assert-Post-Works' + headers = {key: meta_value} + with self.assertRaises(ClientException) as cm: + client.post_container( + self.url, self.token, self.container_name, headers=headers) + self.assertEqual(404, cm.exception.http_status) + + def assert_container_delete_fails(self): + with self.assertRaises(ClientException) as cm: + client.delete_container(self.url, self.token, self.container_name) + self.assertEqual(409, cm.exception.http_status) + + def assert_container_not_found(self): + with self.assertRaises(ClientException) as cm: + client.get_container(self.url, self.token, self.container_name) + self.assertEqual(404, cm.exception.http_status) + # check for headers leaking out while deleted + resp_headers = cm.exception.http_response_headers + self.assertNotIn('X-Container-Object-Count', resp_headers) + self.assertNotIn('X-Container-Bytes-Used', resp_headers) + self.assertNotIn('X-Timestamp', resp_headers) + self.assertNotIn('X-PUT-Timestamp', resp_headers) + + def assert_container_has_shard_sysmeta(self): + node_headers = self.direct_head_container() + for node_id, headers in node_headers.items(): + with annotate_failure('%s in %s' % (node_id, node_headers.keys())): + for k, v in headers.items(): + if k.lower().startswith('x-container-sysmeta-shard'): + break + else: + self.fail('No shard sysmeta found in %s' % headers) + + def assert_container_state(self, node, expected_state, num_shard_ranges): + headers, shard_ranges = direct_client.direct_get_container( + node, self.brain.part, self.account, self.container_name, + headers={'X-Backend-Record-Type': 'shard'}) + self.assertEqual(num_shard_ranges, len(shard_ranges)) + self.assertIn('X-Backend-Sharding-State', headers) + self.assertEqual( + expected_state, headers['X-Backend-Sharding-State']) + return [ShardRange.from_dict(sr) for sr in shard_ranges] + + def get_part_and_node_numbers(self, shard_range): + """Return the partition and node numbers for a shard range.""" + part, nodes = self.brain.ring.get_nodes( + shard_range.account, shard_range.container) + return part, [n['id'] + 1 for n in nodes] + + def run_sharders(self, shard_ranges): + """Run the sharder on partitions for given shard ranges.""" + if not isinstance(shard_ranges, (list, tuple, set)): + shard_ranges = (shard_ranges,) + partitions = ','.join(str(self.get_part_and_node_numbers(sr)[0]) + for sr in shard_ranges) + self.sharders.once(additional_args='--partitions=%s' % partitions) + + def run_sharder_sequentially(self, shard_range=None): + """Run sharder node by node on partition for given shard range.""" + if shard_range: + part, node_numbers = self.get_part_and_node_numbers(shard_range) + else: + part, node_numbers = self.brain.part, self.brain.node_numbers + for node_number in node_numbers: + self.sharders.once(number=node_number, + additional_args='--partitions=%s' % part) + + +class TestContainerShardingNonUTF8(BaseTestContainerSharding): + def test_sharding_listing(self): + # verify parameterised listing of a container during sharding + all_obj_names = self._make_object_names(4 * self.max_shard_size) + obj_names = all_obj_names[::2] + self.put_objects(obj_names) + # choose some names approx in middle of each expected shard range + markers = [ + obj_names[i] for i in range(self.max_shard_size / 4, + 2 * self.max_shard_size, + self.max_shard_size / 2)] + + def check_listing(objects, **params): + qs = '&'.join(['%s=%s' % param for param in params.items()]) + headers, listing = client.get_container( + self.url, self.token, self.container_name, query_string=qs) + listing = [x['name'].encode('utf-8') for x in listing] + if params.get('reverse'): + marker = params.get('marker', ShardRange.MAX) + end_marker = params.get('end_marker', ShardRange.MIN) + expected = [o for o in objects if end_marker < o < marker] + expected.reverse() + else: + marker = params.get('marker', ShardRange.MIN) + end_marker = params.get('end_marker', ShardRange.MAX) + expected = [o for o in objects if marker < o < end_marker] + if 'limit' in params: + expected = expected[:params['limit']] + self.assertEqual(expected, listing) + + def check_listing_precondition_fails(**params): + qs = '&'.join(['%s=%s' % param for param in params.items()]) + with self.assertRaises(ClientException) as cm: + client.get_container( + self.url, self.token, self.container_name, query_string=qs) + self.assertEqual(412, cm.exception.http_status) + return cm.exception + + def do_listing_checks(objects): + check_listing(objects) + check_listing(objects, marker=markers[0], end_marker=markers[1]) + check_listing(objects, marker=markers[0], end_marker=markers[2]) + check_listing(objects, marker=markers[1], end_marker=markers[3]) + check_listing(objects, marker=markers[1], end_marker=markers[3], + limit=self.max_shard_size / 4) + check_listing(objects, marker=markers[1], end_marker=markers[3], + limit=self.max_shard_size / 4) + check_listing(objects, marker=markers[1], end_marker=markers[2], + limit=self.max_shard_size / 2) + check_listing(objects, marker=markers[1], end_marker=markers[1]) + check_listing(objects, reverse=True) + check_listing(objects, reverse=True, end_marker=markers[1]) + check_listing(objects, reverse=True, marker=markers[3], + end_marker=markers[1], limit=self.max_shard_size / 4) + check_listing(objects, reverse=True, marker=markers[3], + end_marker=markers[1], limit=0) + check_listing([], marker=markers[0], end_marker=markers[0]) + check_listing([], marker=markers[0], end_marker=markers[1], + reverse=True) + check_listing(objects, prefix='obj') + check_listing([], prefix='zzz') + # delimiter + headers, listing = client.get_container( + self.url, self.token, self.container_name, + query_string='delimiter=-') + self.assertEqual([{'subdir': 'obj-'}], listing) + + limit = self.cluster_info['swift']['container_listing_limit'] + exc = check_listing_precondition_fails(limit=limit + 1) + self.assertIn('Maximum limit', exc.http_response_content) + exc = check_listing_precondition_fails(delimiter='ab') + self.assertIn('Bad delimiter', exc.http_response_content) + + # sanity checks + do_listing_checks(obj_names) + + # Shard the container + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + # First run the 'leader' in charge of scanning, which finds all shard + # ranges and cleaves first two + self.sharders.once(number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + # Then run sharder on other nodes which will also cleave first two + # shard ranges + for n in self.brain.node_numbers[1:]: + self.sharders.once( + number=n, additional_args='--partitions=%s' % self.brain.part) + + # sanity check shard range states + for node in self.brain.nodes: + self.assert_container_state(node, 'sharding', 4) + shard_ranges = self.get_container_shard_ranges() + self.assertLengthEqual(shard_ranges, 4) + self.assert_shard_range_state(ShardRange.CLEAVED, shard_ranges[:2]) + self.assert_shard_range_state(ShardRange.CREATED, shard_ranges[2:]) + + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() # confirm no sysmeta deleted + self.assert_container_post_ok('sharding') + do_listing_checks(obj_names) + + # put some new objects spread through entire namespace + new_obj_names = all_obj_names[1::4] + self.put_objects(new_obj_names) + + # new objects that fell into the first two cleaved shard ranges are + # reported in listing, new objects in the yet-to-be-cleaved shard + # ranges are not yet included in listing + exp_obj_names = [o for o in obj_names + new_obj_names + if o <= shard_ranges[1].upper] + exp_obj_names += [o for o in obj_names + if o > shard_ranges[1].upper] + exp_obj_names.sort() + do_listing_checks(exp_obj_names) + + # run all the sharders again and the last two shard ranges get cleaved + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 4) + shard_ranges = self.get_container_shard_ranges() + self.assert_shard_range_state(ShardRange.ACTIVE, shard_ranges) + + exp_obj_names = obj_names + new_obj_names + exp_obj_names.sort() + do_listing_checks(exp_obj_names) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + + # delete original objects + self.delete_objects(obj_names) + do_listing_checks(new_obj_names) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + + +class TestContainerShardingUTF8(TestContainerShardingNonUTF8): + def _make_object_names(self, number): + # override default with names that include non-ascii chars + name_length = self.cluster_info['swift']['max_object_name_length'] + obj_names = [] + for x in range(number): + name = (u'obj-\u00e4\u00ea\u00ec\u00f2\u00fb-%04d' % x) + name = name.encode('utf8').ljust(name_length, 'o') + obj_names.append(name) + return obj_names + + def _setup_container_name(self): + # override default with max length name that includes non-ascii chars + super(TestContainerShardingUTF8, self)._setup_container_name() + name_length = self.cluster_info['swift']['max_container_name_length'] + cont_name = self.container_name + u'-\u00e4\u00ea\u00ec\u00f2\u00fb' + self.conainer_name = cont_name.encode('utf8').ljust(name_length, 'x') + + +class TestContainerSharding(BaseTestContainerSharding): + def _test_sharded_listing(self, run_replicators=False): + obj_names = self._make_object_names(self.max_shard_size) + self.put_objects(obj_names) + + # Verify that we start out with normal DBs, no shards + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['normal_dbs'], 3) + self.assertLengthEqual(found['shard_dbs'], 0) + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual('unsharded', broker.get_db_state()) + self.assertLengthEqual(broker.get_shard_ranges(), 0) + + headers, pre_sharding_listing = client.get_container( + self.url, self.token, self.container_name) + self.assertEqual(obj_names, [x['name'].encode('utf-8') + for x in pre_sharding_listing]) # sanity + + # Shard it + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + pre_sharding_headers = client.head_container( + self.url, self.admin_token, self.container_name) + self.assertEqual('True', + pre_sharding_headers.get('x-container-sharding')) + + # Only run the one in charge of scanning + self.sharders.once(number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + + # Verify that we have one sharded db -- though the other normal DBs + # received the shard ranges that got defined + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 1) + broker = ContainerBroker(found['shard_dbs'][0]) + # TODO: assert the shard db is on replica 0 + self.assertIs(True, broker.is_root_container()) + self.assertEqual('sharded', broker.get_db_state()) + orig_root_shard_ranges = [dict(sr) for sr in broker.get_shard_ranges()] + self.assertLengthEqual(orig_root_shard_ranges, 2) + self.assert_total_object_count(len(obj_names), orig_root_shard_ranges) + self.assert_shard_ranges_contiguous(2, orig_root_shard_ranges) + self.assertEqual([ShardRange.ACTIVE, ShardRange.ACTIVE], + [sr['state'] for sr in orig_root_shard_ranges]) + self.direct_delete_container(expect_failure=True) + + self.assertLengthEqual(found['normal_dbs'], 2) + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual('unsharded', broker.get_db_state()) + # the sharded db had shard range meta_timestamps and state updated + # during cleaving, so we do not expect those to be equal on other + # nodes + self.assert_shard_range_lists_equal( + orig_root_shard_ranges, broker.get_shard_ranges(), + excludes=['meta_timestamp', 'state', 'state_timestamp']) + + if run_replicators: + Manager(['container-replicator']).once() + # replication doesn't change the db file names + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 1) + self.assertLengthEqual(found['normal_dbs'], 2) + + # Now that everyone has shard ranges, run *everyone* + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + + # Verify that we only have shard dbs now + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 3) + self.assertLengthEqual(found['normal_dbs'], 0) + # Shards stayed the same + for db_file in found['shard_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual('sharded', broker.get_db_state()) + # Well, except for meta_timestamps, since the shards each reported + self.assert_shard_range_lists_equal( + orig_root_shard_ranges, broker.get_shard_ranges(), + excludes=['meta_timestamp', 'state_timestamp']) + for orig, updated in zip(orig_root_shard_ranges, + broker.get_shard_ranges()): + self.assertGreaterEqual(updated.state_timestamp, + orig['state_timestamp']) + self.assertGreaterEqual(updated.meta_timestamp, + orig['meta_timestamp']) + + # Check that entire listing is available + headers, actual_listing = self.assert_container_listing(obj_names) + # ... and check some other container properties + self.assertEqual(headers['last-modified'], + pre_sharding_headers['last-modified']) + + # It even works in reverse! + headers, listing = client.get_container(self.url, self.token, + self.container_name, + query_string='reverse=on') + self.assertEqual(pre_sharding_listing[::-1], listing) + + # Now put some new objects into first shard, taking its count to + # 3 shard ranges' worth + more_obj_names = [ + 'beta%03d' % x for x in range(self.max_shard_size)] + self.put_objects(more_obj_names) + + # The listing includes new objects... + headers, listing = self.assert_container_listing( + more_obj_names + obj_names) + self.assertEqual(pre_sharding_listing, listing[len(more_obj_names):]) + + # ...but root object count is out of date until the sharders run and + # update the root + self.assert_container_object_count(len(obj_names)) + + # run sharders on the shard to get root updated + shard_1 = ShardRange.from_dict(orig_root_shard_ranges[0]) + self.run_sharders(shard_1) + self.assert_container_object_count(len(more_obj_names + obj_names)) + + # we've added objects enough that we need to shard the first shard + # *again* into three new sub-shards, but nothing happens until the root + # leader identifies shard candidate... + root_shard_ranges = self.direct_get_container_shard_ranges() + for node, (hdrs, root_shards) in root_shard_ranges.items(): + self.assertLengthEqual(root_shards, 2) + with annotate_failure('node %s. ' % node): + self.assertEqual( + [ShardRange.ACTIVE] * 2, + [sr['state'] for sr in root_shards]) + # orig shards 0, 1 should be contiguous + self.assert_shard_ranges_contiguous(2, root_shards) + + # Now run the root leader to identify shard candidate...while one of + # the shard container servers is down + shard_1_part, shard_1_nodes = self.get_part_and_node_numbers(shard_1) + self.brain.servers.stop(number=shard_1_nodes[2]) + self.sharders.once(number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + + # ... so third replica of first shard state is not moved to sharding + found_for_shard = self.categorize_container_dir_content( + shard_1.account, shard_1.container) + self.assertLengthEqual(found_for_shard['normal_dbs'], 3) + self.assertEqual( + [ShardRange.SHARDING, ShardRange.SHARDING, ShardRange.ACTIVE], + [ContainerBroker(db_file).get_own_shard_range().state + for db_file in found_for_shard['normal_dbs']]) + + # ...then run first cycle of first shard sharders in order, leader + # first, to get to predictable state where all nodes have cleaved 2 out + # of 3 ranges...starting with first two nodes + for node_number in shard_1_nodes[:2]: + self.sharders.once( + number=node_number, + additional_args='--partitions=%s' % shard_1_part) + + # ... first two replicas start sharding to sub-shards + found_for_shard = self.categorize_container_dir_content( + shard_1.account, shard_1.container) + self.assertLengthEqual(found_for_shard['shard_dbs'], 2) + for db_file in found_for_shard['shard_dbs'][:2]: + broker = ContainerBroker(db_file) + with annotate_failure('shard db file %s. ' % db_file): + self.assertIs(False, broker.is_root_container()) + self.assertEqual('sharding', broker.get_db_state()) + self.assertEqual( + ShardRange.SHARDING, broker.get_own_shard_range().state) + shard_shards = broker.get_shard_ranges() + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, + ShardRange.CREATED], + [sr.state for sr in shard_shards]) + self.assert_shard_ranges_contiguous( + 3, shard_shards, + first_lower=orig_root_shard_ranges[0]['lower'], + last_upper=orig_root_shard_ranges[0]['upper']) + + # but third replica still has no idea it should be sharding + self.assertLengthEqual(found_for_shard['normal_dbs'], 3) + self.assertEqual( + ShardRange.ACTIVE, + ContainerBroker( + found_for_shard['normal_dbs'][2]).get_own_shard_range().state) + + # ...but once sharder runs on third replica it will learn its state; + # note that any root replica on the stopped container server also won't + # know about the shards being in sharding state, so leave that server + # stopped for now so that shard fetches its state from an up-to-date + # root replica + self.sharders.once( + number=shard_1_nodes[2], + additional_args='--partitions=%s' % shard_1_part) + + # third replica is sharding but has no sub-shard ranges yet... + found_for_shard = self.categorize_container_dir_content( + shard_1.account, shard_1.container) + self.assertLengthEqual(found_for_shard['shard_dbs'], 2) + self.assertLengthEqual(found_for_shard['normal_dbs'], 3) + broker = ContainerBroker(found_for_shard['normal_dbs'][2]) + self.assertEqual('unsharded', broker.get_db_state()) + self.assertEqual( + ShardRange.SHARDING, broker.get_own_shard_range().state) + self.assertFalse(broker.get_shard_ranges()) + + # ...until sub-shard ranges are replicated from another shard replica; + # there may also be a sub-shard replica missing so run replicators on + # all nodes to fix that if necessary + self.brain.servers.start(number=shard_1_nodes[2]) + self.replicators.once() + + # now run sharder again on third replica + self.sharders.once( + number=shard_1_nodes[2], + additional_args='--partitions=%s' % shard_1_part) + + # check original first shard range state and sub-shards - all replicas + # should now be in consistent state + found_for_shard = self.categorize_container_dir_content( + shard_1.account, shard_1.container) + self.assertLengthEqual(found_for_shard['shard_dbs'], 3) + self.assertLengthEqual(found_for_shard['normal_dbs'], 3) + for db_file in found_for_shard['shard_dbs']: + broker = ContainerBroker(db_file) + with annotate_failure('shard db file %s. ' % db_file): + self.assertIs(False, broker.is_root_container()) + self.assertEqual('sharding', broker.get_db_state()) + self.assertEqual( + ShardRange.SHARDING, broker.get_own_shard_range().state) + shard_shards = broker.get_shard_ranges() + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, + ShardRange.CREATED], + [sr.state for sr in shard_shards]) + self.assert_shard_ranges_contiguous( + 3, shard_shards, + first_lower=orig_root_shard_ranges[0]['lower'], + last_upper=orig_root_shard_ranges[0]['upper']) + + # check third sub-shard is in created state + sub_shard = shard_shards[2] + found_for_sub_shard = self.categorize_container_dir_content( + sub_shard.account, sub_shard.container) + self.assertFalse(found_for_sub_shard['shard_dbs']) + self.assertLengthEqual(found_for_sub_shard['normal_dbs'], 3) + for db_file in found_for_sub_shard['normal_dbs']: + broker = ContainerBroker(db_file) + with annotate_failure('sub shard db file %s. ' % db_file): + self.assertIs(False, broker.is_root_container()) + self.assertEqual('unsharded', broker.get_db_state()) + self.assertEqual( + ShardRange.CREATED, broker.get_own_shard_range().state) + self.assertFalse(broker.get_shard_ranges()) + + # check root shard ranges + root_shard_ranges = self.direct_get_container_shard_ranges() + for node, (hdrs, root_shards) in root_shard_ranges.items(): + self.assertLengthEqual(root_shards, 5) + with annotate_failure('node %s. ' % node): + # shard ranges are sorted by upper, state, lower, so expect: + # sub-shards, orig shard 0, orig shard 1 + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, + ShardRange.CREATED, ShardRange.SHARDING, + ShardRange.ACTIVE], + [sr['state'] for sr in root_shards]) + # sub-shards 0, 1, 2, orig shard 1 should be contiguous + self.assert_shard_ranges_contiguous( + 4, root_shards[:3] + root_shards[4:]) + # orig shards 0, 1 should be contiguous + self.assert_shard_ranges_contiguous(2, root_shards[3:]) + + self.assert_container_listing(more_obj_names + obj_names) + self.assert_container_object_count(len(more_obj_names + obj_names)) + + # add another object that lands in the first of the new sub-shards + self.put_objects(['alpha']) + + # TODO: assert that alpha is in the first new shard + self.assert_container_listing(['alpha'] + more_obj_names + obj_names) + # Run sharders again so things settle. + self.run_sharders(shard_1) + + # check original first shard range shards + for db_file in found_for_shard['shard_dbs']: + broker = ContainerBroker(db_file) + with annotate_failure('shard db file %s. ' % db_file): + self.assertIs(False, broker.is_root_container()) + self.assertEqual('sharded', broker.get_db_state()) + self.assertEqual( + [ShardRange.ACTIVE] * 3, + [sr.state for sr in broker.get_shard_ranges()]) + # check root shard ranges + root_shard_ranges = self.direct_get_container_shard_ranges() + for node, (hdrs, root_shards) in root_shard_ranges.items(): + # old first shard range should have been deleted + self.assertLengthEqual(root_shards, 4) + with annotate_failure('node %s. ' % node): + self.assertEqual( + [ShardRange.ACTIVE] * 4, + [sr['state'] for sr in root_shards]) + self.assert_shard_ranges_contiguous(4, root_shards) + + headers, final_listing = self.assert_container_listing( + ['alpha'] + more_obj_names + obj_names) + + # check root + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 3) + self.assertLengthEqual(found['normal_dbs'], 0) + new_shard_ranges = None + for db_file in found['shard_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual('sharded', broker.get_db_state()) + if new_shard_ranges is None: + new_shard_ranges = broker.get_shard_ranges( + include_deleted=True) + self.assertLengthEqual(new_shard_ranges, 5) + # Second half is still there, and unchanged + self.assertIn( + dict(orig_root_shard_ranges[1], meta_timestamp=None, + state_timestamp=None), + [dict(sr, meta_timestamp=None, state_timestamp=None) + for sr in new_shard_ranges]) + # But the first half split in three, then deleted + by_name = {sr.name: sr for sr in new_shard_ranges} + self.assertIn(orig_root_shard_ranges[0]['name'], by_name) + old_shard_range = by_name.pop( + orig_root_shard_ranges[0]['name']) + self.assertTrue(old_shard_range.deleted) + self.assert_shard_ranges_contiguous(4, by_name.values()) + else: + # Everyone's on the same page. Well, except for + # meta_timestamps, since the shards each reported + other_shard_ranges = broker.get_shard_ranges( + include_deleted=True) + self.assert_shard_range_lists_equal( + new_shard_ranges, other_shard_ranges, + excludes=['meta_timestamp', 'state_timestamp']) + for orig, updated in zip(orig_root_shard_ranges, + other_shard_ranges): + self.assertGreaterEqual(updated.meta_timestamp, + orig['meta_timestamp']) + + self.assert_container_delete_fails() + + for obj in final_listing: + client.delete_object( + self.url, self.token, self.container_name, obj['name']) + + # the objects won't be listed anymore + self.assert_container_listing([]) + # but root container stats will not yet be aware of the deletions + self.assert_container_delete_fails() + + # One server was down while the shard sharded its first two sub-shards, + # so there may be undeleted handoff db(s) for sub-shard(s) that were + # not fully replicated; run replicators now to clean up so they no + # longer report bogus stats to root. + self.replicators.once() + + # Run sharder so that shard containers update the root. Do not run + # sharder on root container because that triggers shrinks which can + # cause root object count to temporarily be non-zero and prevent the + # final delete. + self.run_sharders(self.get_container_shard_ranges()) + # then root is empty and can be deleted + self.assert_container_listing([]) + self.assert_container_object_count(0) + client.delete_container(self.url, self.token, self.container_name) + + def test_sharded_listing_no_replicators(self): + self._test_sharded_listing() + + def test_sharded_listing_with_replicators(self): + self._test_sharded_listing(run_replicators=True) + + def test_async_pendings(self): + obj_names = self._make_object_names(self.max_shard_size * 2) + + # There are some updates *everyone* gets + self.put_objects(obj_names[::5]) + # But roll some outages so each container only get ~2/5 more object + # records i.e. total of 3/5 updates per container; and async pendings + # pile up + for i, n in enumerate(self.brain.node_numbers, start=1): + self.brain.servers.stop(number=n) + self.put_objects(obj_names[i::5]) + self.brain.servers.start(number=n) + + # But there are also 1/5 updates *no one* gets + self.brain.servers.stop() + self.put_objects(obj_names[4::5]) + self.brain.servers.start() + + # Shard it + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + headers = client.head_container(self.url, self.admin_token, + self.container_name) + self.assertEqual('True', headers.get('x-container-sharding')) + + # sanity check + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 0) + self.assertLengthEqual(found['normal_dbs'], 3) + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual(len(obj_names) * 3 // 5, + broker.get_info()['object_count']) + + # Only run the 'leader' in charge of scanning. + # Each container has ~2 * max * 3/5 objects + # which are distributed from obj000 to obj<2 * max - 1>, + # so expect 3 shard ranges to be found: the first two will be complete + # shards with max/2 objects and lower/upper bounds spaced by approx: + # (2 * max - 1)/(2 * max * 3/5) * (max/2) =~ 5/6 * max + # + # Note that during this shard cycle the leader replicates to other + # nodes so they will end up with ~2 * max * 4/5 objects. + self.sharders.once(number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + + # Verify that we have one shard db -- though the other normal DBs + # received the shard ranges that got defined + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 1) + node_index_zero_db = found['shard_dbs'][0] + broker = ContainerBroker(node_index_zero_db) + self.assertIs(True, broker.is_root_container()) + self.assertEqual(SHARDING, broker.get_db_state()) + expected_shard_ranges = broker.get_shard_ranges() + self.assertLengthEqual(expected_shard_ranges, 3) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED], + [sr.state for sr in expected_shard_ranges]) + + # Still have all three big DBs -- we've only cleaved 2 of the 3 shard + # ranges that got defined + self.assertLengthEqual(found['normal_dbs'], 3) + db_states = [] + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + db_states.append(broker.get_db_state()) + # the sharded db had shard range meta_timestamps updated during + # cleaving, so we do not expect those to be equal on other nodes + self.assert_shard_range_lists_equal( + expected_shard_ranges, broker.get_shard_ranges(), + excludes=['meta_timestamp', 'state_timestamp', 'state']) + self.assertEqual(len(obj_names) * 3 // 5, + broker.get_info()['object_count']) + self.assertEqual([SHARDING, UNSHARDED, UNSHARDED], sorted(db_states)) + + # Run the other sharders so we're all in (roughly) the same state + for n in self.brain.node_numbers[1:]: + self.sharders.once( + number=n, + additional_args='--partitions=%s' % self.brain.part) + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 3) + self.assertLengthEqual(found['normal_dbs'], 3) + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertEqual(SHARDING, broker.get_db_state()) + # no new rows + self.assertEqual(len(obj_names) * 3 // 5, + broker.get_info()['object_count']) + + # Run updaters to clear the async pendings + Manager(['object-updater']).once() + + # Our "big" dbs didn't take updates + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertEqual(len(obj_names) * 3 // 5, + broker.get_info()['object_count']) + + # TODO: confirm that the updates got redirected to the shards + + # The entire listing is not yet available - we have two cleaved shard + # ranges, complete with async updates, but for the remainder of the + # namespace only what landed in the original container + headers, listing = client.get_container(self.url, self.token, + self.container_name) + start_listing = [ + o for o in obj_names if o <= expected_shard_ranges[1].upper] + self.assertEqual( + [x['name'].encode('utf-8') for x in listing[:len(start_listing)]], + start_listing) + # we can't assert much about the remaining listing, other than that + # there should be something + self.assertTrue( + [x['name'].encode('utf-8') for x in listing[len(start_listing):]]) + # Object count is hard to reason about though! + # TODO: nail down what this *should* be and make sure all containers + # respond with it! Depending on what you're looking at, this + # could be 0, 1/2, 7/12 (!?), 3/5, 2/3, or 4/5 or all objects! + # Apparently, it may not even be present at all! + # self.assertIn('x-container-object-count', headers) + # self.assertEqual(headers['x-container-object-count'], + # str(len(obj_names) - len(obj_names) // 6)) + + # TODO: Doesn't work in reverse, yet + # headers, listing = client.get_container(self.url, self.token, + # self.container_name, + # query_string='reverse=on') + # self.assertEqual([x['name'].encode('utf-8') for x in listing], + # obj_names[::-1]) + + # Run the sharders again to get everything to settle + self.sharders.once() + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 3) + self.assertLengthEqual(found['normal_dbs'], 0) + # now all shards have been cleaved we should get the complete listing + headers, listing = client.get_container(self.url, self.token, + self.container_name) + self.assertEqual([x['name'].encode('utf-8') for x in listing], + obj_names) + + def test_shrinking(self): + int_client = self.make_internal_client() + + def check_node_data(node_data, exp_hdrs, exp_obj_count, exp_shards): + hdrs, range_data = node_data + self.assert_dict_contains(exp_hdrs, hdrs) + self.assert_shard_ranges_contiguous(exp_shards, range_data) + self.assert_total_object_count(exp_obj_count, range_data) + + def check_shard_nodes_data(node_data, expected_state='unsharded', + expected_shards=0, exp_obj_count=0): + # checks that shard range is consistent on all nodes + root_path = '%s/%s' % (self.account, self.container_name) + exp_shard_hdrs = {'X-Container-Sysmeta-Shard-Root': root_path, + 'X-Backend-Sharding-State': expected_state} + object_counts = [] + bytes_used = [] + for node_id, node_data in node_data.items(): + with annotate_failure('Node id %s.' % node_id): + check_node_data( + node_data, exp_shard_hdrs, exp_obj_count, + expected_shards) + hdrs = node_data[0] + object_counts.append(int(hdrs['X-Container-Object-Count'])) + bytes_used.append(int(hdrs['X-Container-Bytes-Used'])) + if len(set(object_counts)) != 1: + self.fail('Inconsistent object counts: %s' % object_counts) + if len(set(bytes_used)) != 1: + self.fail('Inconsistent bytes used: %s' % bytes_used) + return object_counts[0], bytes_used[0] + + repeat = [0] + + def do_shard_then_shrink(): + repeat[0] += 1 + obj_names = ['obj-%s-%03d' % (repeat[0], x) + for x in range(self.max_shard_size)] + self.put_objects(obj_names) + # these two object names will fall at start of first shard range... + alpha = 'alpha-%s' % repeat[0] + beta = 'beta-%s' % repeat[0] + + # Enable sharding + client.post_container( + self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + + # sanity check + self.assert_container_listing(obj_names) + + # Only run the one in charge of scanning + self.sharders.once( + number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + + # check root container + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + + # nodes on which sharder has not run are still in unsharded state + # but have had shard ranges replicated to them + exp_obj_count = len(obj_names) + exp_hdrs = {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': str(exp_obj_count)} + node_id = self.brain.node_numbers[1] - 1 + check_node_data( + root_nodes_data[node_id], exp_hdrs, exp_obj_count, 2) + node_id = self.brain.node_numbers[2] - 1 + check_node_data( + root_nodes_data[node_id], exp_hdrs, exp_obj_count, 2) + + # only one that ran sharder is in sharded state + exp_hdrs['X-Backend-Sharding-State'] = 'sharded' + node_id = self.brain.node_numbers[0] - 1 + check_node_data( + root_nodes_data[node_id], exp_hdrs, exp_obj_count, 2) + + orig_range_data = root_nodes_data[node_id][1] + orig_shard_ranges = [ShardRange.from_dict(r) + for r in orig_range_data] + + # check first shard + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[0].account, orig_shard_ranges[0].container) + obj_count, bytes_used = check_shard_nodes_data(shard_nodes_data) + total_shard_object_count = obj_count + + # check second shard + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[1].account, orig_shard_ranges[1].container) + obj_count, bytes_used = check_shard_nodes_data(shard_nodes_data) + total_shard_object_count += obj_count + self.assertEqual(exp_obj_count, total_shard_object_count) + + # Now that everyone has shard ranges, run *everyone* + self.sharders.once( + additional_args='--partitions=%s' % self.brain.part) + + # all root container nodes should now be in sharded state + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + for node_id, node_data in root_nodes_data.items(): + with annotate_failure('Node id %s.' % node_id): + check_node_data(node_data, exp_hdrs, exp_obj_count, 2) + + # run updaters to update .sharded account; shard containers have + # not updated account since having objects replicated to them + self.updaters.once() + shard_cont_count, shard_obj_count = int_client.get_account_info( + orig_shard_ranges[0].account, [204]) + self.assertEqual(2 * repeat[0], shard_cont_count) + self.assertEqual(len(obj_names), shard_obj_count) + + # checking the listing also refreshes proxy container info cache so + # that the proxy becomes aware that container is sharded and will + # now look up the shard target for subsequent updates + self.assert_container_listing(obj_names) + + # delete objects from first shard range + first_shard_objects = [obj_name for obj_name in obj_names + if obj_name <= orig_shard_ranges[0].upper] + for obj in first_shard_objects: + client.delete_object( + self.url, self.token, self.container_name, obj) + with self.assertRaises(ClientException): + client.get_object( + self.url, self.token, self.container_name, obj) + + second_shard_objects = [obj_name for obj_name in obj_names + if obj_name > orig_shard_ranges[1].lower] + self.assert_container_listing(second_shard_objects) + + self.put_objects([alpha]) + second_shard_objects = [obj_name for obj_name in obj_names + if obj_name > orig_shard_ranges[1].lower] + self.assert_container_listing([alpha] + second_shard_objects) + + # while container servers are down, but proxy has container info in + # cache from recent listing, put another object; this update will + # lurk in async pending until the updaters run again + # TODO: because all the root container servers are down and + # therefore cannot respond to a GET for a redirect target, the + # object update will default to being targeted at the root + # container; can we provoke an object update that does get targeted + # to the shard, but fails to update shard, so that the async + # pending will first be directed to the shard when the updaters + # run? + self.stop_container_servers() + self.put_objects([beta]) + self.brain.servers.start() + async_pendings = self.gather_async_pendings( + self.get_all_object_nodes()) + num_container_replicas = len(self.brain.nodes) + num_obj_replicas = self.policy.object_ring.replica_count + expected_num_updates = num_container_updates( + num_container_replicas, quorum_size(num_container_replicas), + num_obj_replicas, self.policy.quorum) + expected_num_pendings = min(expected_num_updates, num_obj_replicas) + # sanity check + with annotate_failure('policy %s. ' % self.policy): + self.assertLengthEqual(async_pendings, expected_num_pendings) + + # root object count is not updated... + self.assert_container_object_count(len(obj_names)) + self.assert_container_listing([alpha] + second_shard_objects) + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + for node_id, node_data in root_nodes_data.items(): + with annotate_failure('Node id %s.' % node_id): + check_node_data(node_data, exp_hdrs, exp_obj_count, 2) + range_data = node_data[1] + self.assert_shard_range_lists_equal( + orig_range_data, range_data, + excludes=['meta_timestamp', 'state_timestamp']) + + # ...until the sharders run and update root + self.run_sharders(orig_shard_ranges[0]) + exp_obj_count = len(second_shard_objects) + 1 + self.assert_container_object_count(exp_obj_count) + self.assert_container_listing([alpha] + second_shard_objects) + + # root sharder finds donor, acceptor pair and pushes changes + self.sharders.once( + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_listing([alpha] + second_shard_objects) + # run sharder on donor to shrink and replicate to acceptor + self.run_sharders(orig_shard_ranges[0]) + self.assert_container_listing([alpha] + second_shard_objects) + # run sharder on acceptor to update root with stats + self.run_sharders(orig_shard_ranges[1]) + self.assert_container_listing([alpha] + second_shard_objects) + self.assert_container_object_count(len(second_shard_objects) + 1) + + # check root container + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + exp_hdrs['X-Container-Object-Count'] = str(exp_obj_count) + for node_id, node_data in root_nodes_data.items(): + with annotate_failure('Node id %s.' % node_id): + # NB now only *one* shard range in root + check_node_data(node_data, exp_hdrs, exp_obj_count, 1) + + # the acceptor shard is intact.. + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[1].account, orig_shard_ranges[1].container) + obj_count, bytes_used = check_shard_nodes_data(shard_nodes_data) + # all objects should now be in this shard + self.assertEqual(exp_obj_count, obj_count) + + # the donor shard is also still intact + # TODO: once we have figured out when these redundant donors are + # deleted, test for deletion/clean up + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[0].account, orig_shard_ranges[0].container) + # the donor's shard range will have the acceptor's projected stats + obj_count, bytes_used = check_shard_nodes_data( + shard_nodes_data, expected_state='sharded', expected_shards=1, + exp_obj_count=len(second_shard_objects) + 1) + # but the donor is empty and so reports zero stats + self.assertEqual(0, obj_count) + self.assertEqual(0, bytes_used) + + # delete all the second shard's object apart from 'alpha' + for obj in second_shard_objects: + client.delete_object( + self.url, self.token, self.container_name, obj) + + self.assert_container_listing([alpha]) + + # runs sharders so second range shrinks away, requires up to 3 + # cycles + self.sharders.once() # shard updates root stats + self.assert_container_listing([alpha]) + self.sharders.once() # root finds shrinkable shard + self.assert_container_listing([alpha]) + self.sharders.once() # shards shrink themselves + self.assert_container_listing([alpha]) + + # the second shard range has sharded and is empty + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[1].account, orig_shard_ranges[1].container) + check_shard_nodes_data( + shard_nodes_data, expected_state='sharded', expected_shards=1, + exp_obj_count=1) + + # check root container + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + exp_hdrs = {'X-Backend-Sharding-State': 'collapsed', + # just the alpha object + 'X-Container-Object-Count': '1'} + for node_id, node_data in root_nodes_data.items(): + with annotate_failure('Node id %s.' % node_id): + # NB now no shard ranges in root + check_node_data(node_data, exp_hdrs, 0, 0) + + # delete the alpha object + client.delete_object( + self.url, self.token, self.container_name, alpha) + # should now be able to delete the *apparently* empty container + client.delete_container(self.url, self.token, self.container_name) + self.assert_container_not_found() + self.direct_head_container(expect_failure=True) + + # and the container stays deleted even after sharders run and shard + # send updates + self.sharders.once() + self.assert_container_not_found() + self.direct_head_container(expect_failure=True) + + # now run updaters to deal with the async pending for the beta + # object + self.updaters.once() + # and the container is revived! + self.assert_container_listing([beta]) + + # finally, clear out the container + client.delete_object( + self.url, self.token, self.container_name, beta) + + do_shard_then_shrink() + # repeat from starting point of a collapsed and previously deleted + # container + do_shard_then_shrink() + + def _setup_replication_scenario(self, num_shards, extra_objs=('alpha',)): + # Get cluster to state where 2 replicas are sharding or sharded but 3rd + # replica is unsharded and has an object that the first 2 are missing. + + # put objects while all servers are up + obj_names = self._make_object_names( + num_shards * self.max_shard_size / 2) + self.put_objects(obj_names) + + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + node_numbers = self.brain.node_numbers + + # run replicators first time to get sync points set + self.replicators.once() + + # stop the leader node and one other server + self.stop_container_servers(slice(0, 2)) + + # ...then put one more object in first shard range namespace + self.put_objects(extra_objs) + + # start leader and first other server, stop third server + for number in node_numbers[:2]: + self.brain.servers.start(number=number) + self.brain.servers.stop(number=node_numbers[2]) + self.assert_container_listing(obj_names) # sanity check + + # shard the container - first two shard ranges are cleaved + for number in node_numbers[:2]: + self.sharders.once( + number=number, + additional_args='--partitions=%s' % self.brain.part) + + self.assert_container_listing(obj_names) # sanity check + return obj_names + + def test_replication_to_sharding_container(self): + # verify that replication from an unsharded replica to a sharding + # replica does not replicate rows but does replicate shard ranges + obj_names = self._setup_replication_scenario(3) + for node in self.brain.nodes[:2]: + self.assert_container_state(node, 'sharding', 3) + + # bring third server back up, run replicator + node_numbers = self.brain.node_numbers + self.brain.servers.start(number=node_numbers[2]) + # sanity check... + self.assert_container_state(self.brain.nodes[2], 'unsharded', 0) + self.replicators.once(number=node_numbers[2]) + # check db files unchanged + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 2) + self.assertLengthEqual(found['normal_dbs'], 3) + + # the 'alpha' object is NOT replicated to the two sharded nodes + for node in self.brain.nodes[:2]: + broker = self.get_broker(self.brain.part, node) + with annotate_failure( + 'Node id %s in %s' % (node['id'], self.brain.nodes[:2])): + self.assertFalse(broker.get_objects()) + self.assert_container_state(node, 'sharding', 3) + self.brain.servers.stop(number=node_numbers[2]) + self.assert_container_listing(obj_names) + + # all nodes now have shard ranges + self.brain.servers.start(number=node_numbers[2]) + node_data = self.direct_get_container_shard_ranges() + for node, (hdrs, shard_ranges) in node_data.items(): + with annotate_failure(node): + self.assert_shard_ranges_contiguous(3, shard_ranges) + + # complete cleaving third shard range on first two nodes + self.brain.servers.stop(number=node_numbers[2]) + for number in node_numbers[:2]: + self.sharders.once( + number=number, + additional_args='--partitions=%s' % self.brain.part) + # ...and now they are in sharded state + self.assert_container_state(self.brain.nodes[0], 'sharded', 3) + self.assert_container_state(self.brain.nodes[1], 'sharded', 3) + # ...still no 'alpha' object in listing + self.assert_container_listing(obj_names) + + # run the sharder on the third server, alpha object is included in + # shards that it cleaves + self.brain.servers.start(number=node_numbers[2]) + self.assert_container_state(self.brain.nodes[2], 'unsharded', 3) + self.sharders.once(number=node_numbers[2], + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_state(self.brain.nodes[2], 'sharding', 3) + self.sharders.once(number=node_numbers[2], + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_state(self.brain.nodes[2], 'sharded', 3) + self.assert_container_listing(['alpha'] + obj_names) + + def test_replication_to_sharded_container(self): + # verify that replication from an unsharded replica to a sharded + # replica does not replicate rows but does replicate shard ranges + obj_names = self._setup_replication_scenario(2) + for node in self.brain.nodes[:2]: + self.assert_container_state(node, 'sharded', 2) + + # sanity check + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 2) + self.assertLengthEqual(found['normal_dbs'], 1) + for node in self.brain.nodes[:2]: + broker = self.get_broker(self.brain.part, node) + info = broker.get_info() + with annotate_failure( + 'Node id %s in %s' % (node['id'], self.brain.nodes[:2])): + self.assertEqual(len(obj_names), info['object_count']) + self.assertFalse(broker.get_objects()) + + # bring third server back up, run replicator + node_numbers = self.brain.node_numbers + self.brain.servers.start(number=node_numbers[2]) + # sanity check... + self.assert_container_state(self.brain.nodes[2], 'unsharded', 0) + self.replicators.once(number=node_numbers[2]) + # check db files unchanged + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 2) + self.assertLengthEqual(found['normal_dbs'], 1) + + # the 'alpha' object is NOT replicated to the two sharded nodes + for node in self.brain.nodes[:2]: + broker = self.get_broker(self.brain.part, node) + with annotate_failure( + 'Node id %s in %s' % (node['id'], self.brain.nodes[:2])): + self.assertFalse(broker.get_objects()) + self.assert_container_state(node, 'sharded', 2) + self.brain.servers.stop(number=node_numbers[2]) + self.assert_container_listing(obj_names) + + # all nodes now have shard ranges + self.brain.servers.start(number=node_numbers[2]) + node_data = self.direct_get_container_shard_ranges() + for node, (hdrs, shard_ranges) in node_data.items(): + with annotate_failure(node): + self.assert_shard_ranges_contiguous(2, shard_ranges) + + # run the sharder on the third server, alpha object is included in + # shards that it cleaves + self.assert_container_state(self.brain.nodes[2], 'unsharded', 2) + self.sharders.once(number=node_numbers[2], + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_state(self.brain.nodes[2], 'sharded', 2) + self.assert_container_listing(['alpha'] + obj_names) + + def test_sharding_requires_sufficient_replication(self): + # verify that cleaving only progresses if each cleaved shard range is + # sufficiently replicated + + # put enough objects for 4 shard ranges + obj_names = self._make_object_names(2 * self.max_shard_size) + self.put_objects(obj_names) + + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + node_numbers = self.brain.node_numbers + leader_node = self.brain.nodes[0] + leader_num = node_numbers[0] + + # run replicators first time to get sync points set + self.replicators.once() + + # start sharding on the leader node + self.sharders.once(number=leader_num, + additional_args='--partitions=%s' % self.brain.part) + shard_ranges = self.assert_container_state(leader_node, 'sharding', 4) + self.assertEqual([ShardRange.CLEAVED] * 2 + [ShardRange.CREATED] * 2, + [sr.state for sr in shard_ranges]) + + # stop *all* container servers for third shard range + sr_part, sr_node_nums = self.get_part_and_node_numbers(shard_ranges[2]) + for node_num in sr_node_nums: + self.brain.servers.stop(number=node_num) + + # attempt to continue sharding on the leader node + self.sharders.once(number=leader_num, + additional_args='--partitions=%s' % self.brain.part) + + # no cleaving progress was made + for node_num in sr_node_nums: + self.brain.servers.start(number=node_num) + shard_ranges = self.assert_container_state(leader_node, 'sharding', 4) + self.assertEqual([ShardRange.CLEAVED] * 2 + [ShardRange.CREATED] * 2, + [sr.state for sr in shard_ranges]) + + # stop two of the servers for third shard range, not including any + # server that happens to be the leader node + stopped = [] + for node_num in sr_node_nums: + if node_num != leader_num: + self.brain.servers.stop(number=node_num) + stopped.append(node_num) + if len(stopped) >= 2: + break + self.assertLengthEqual(stopped, 2) # sanity check + + # attempt to continue sharding on the leader node + self.sharders.once(number=leader_num, + additional_args='--partitions=%s' % self.brain.part) + + # no cleaving progress was made + for node_num in stopped: + self.brain.servers.start(number=node_num) + shard_ranges = self.assert_container_state(leader_node, 'sharding', 4) + self.assertEqual([ShardRange.CLEAVED] * 2 + [ShardRange.CREATED] * 2, + [sr.state for sr in shard_ranges]) + + # stop just one of the servers for third shard range + stopped = [] + for node_num in sr_node_nums: + if node_num != leader_num: + self.brain.servers.stop(number=node_num) + stopped.append(node_num) + break + self.assertLengthEqual(stopped, 1) # sanity check + + # attempt to continue sharding the container + self.sharders.once(number=leader_num, + additional_args='--partitions=%s' % self.brain.part) + + # this time cleaving completed + self.brain.servers.start(number=stopped[0]) + shard_ranges = self.assert_container_state(leader_node, 'sharded', 4) + self.assertEqual([ShardRange.ACTIVE] * 4, + [sr.state for sr in shard_ranges]) + + def test_sharded_delete(self): + all_obj_names = self._make_object_names(self.max_shard_size) + self.put_objects(all_obj_names) + # Shard the container + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + for n in self.brain.node_numbers: + self.sharders.once( + number=n, additional_args='--partitions=%s' % self.brain.part) + # sanity checks + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 2) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + self.assert_container_listing(all_obj_names) + + # delete all objects - updates redirected to shards + self.delete_objects(all_obj_names) + self.assert_container_listing([]) + self.assert_container_post_ok('has objects') + # root not yet updated with shard stats + self.assert_container_object_count(len(all_obj_names)) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + + # run sharder on shard containers to update root stats + shard_ranges = self.get_container_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + self.run_sharders(shard_ranges) + self.assert_container_listing([]) + self.assert_container_post_ok('empty') + self.assert_container_object_count(0) + + # put a new object - update redirected to shard + self.put_objects(['alpha']) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(0) + + # before root learns about new object in shard, delete the container + client.delete_container(self.url, self.token, self.container_name) + self.assert_container_post_fails('deleted') + self.assert_container_not_found() + + # run the sharders to update root with shard stats + self.run_sharders(shard_ranges) + + self.assert_container_listing(['alpha']) + self.assert_container_object_count(1) + self.assert_container_delete_fails() + self.assert_container_post_ok('revived') + + def test_object_update_redirection(self): + all_obj_names = self._make_object_names(self.max_shard_size) + self.put_objects(all_obj_names) + # Shard the container + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + for n in self.brain.node_numbers: + self.sharders.once( + number=n, additional_args='--partitions=%s' % self.brain.part) + # sanity checks + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 2) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + self.assert_container_listing(all_obj_names) + + # delete all objects - updates redirected to shards + self.delete_objects(all_obj_names) + self.assert_container_listing([]) + self.assert_container_post_ok('has objects') + + # run sharder on shard containers to update root stats + shard_ranges = self.get_container_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + self.run_sharders(shard_ranges) + self.assert_container_object_count(0) + + # First, test a misplaced object moving from one shard to another. + # with one shard server down, put a new 'alpha' object... + shard_part, shard_nodes = self.get_part_and_node_numbers( + shard_ranges[0]) + self.brain.servers.stop(number=shard_nodes[2]) + self.put_objects(['alpha']) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(0) + self.assertLengthEqual( + self.gather_async_pendings(self.get_all_object_nodes()), 1) + self.brain.servers.start(number=shard_nodes[2]) + + # run sharder on root to discover first shrink candidate + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + # then run sharder on the shard node without the alpha object + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=shard_nodes[2]) + # root sees first shard has shrunk, only second shard range used for + # listing so alpha object not in listing + self.assertLengthEqual(self.get_container_shard_ranges(), 1) + self.assert_container_listing([]) + self.assert_container_object_count(0) + + # run the updaters: the async pending update will be redirected from + # shrunk shard to second shard + self.updaters.once() + self.assert_container_listing(['alpha']) + self.assert_container_object_count(0) # root not yet updated + + # then run sharder on other shard nodes to complete shrinking + for number in shard_nodes[:2]: + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=number) + # and get root updated + self.run_sharders(shard_ranges[1]) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(1) + self.assertLengthEqual(self.get_container_shard_ranges(), 1) + + # Now we have just one active shard, test a misplaced object moving + # from that shard to the root. + # with one shard server down, delete 'alpha' and put a 'beta' object... + shard_part, shard_nodes = self.get_part_and_node_numbers( + shard_ranges[1]) + self.brain.servers.stop(number=shard_nodes[2]) + self.delete_objects(['alpha']) + self.put_objects(['beta']) + self.assert_container_listing(['beta']) + self.assert_container_object_count(1) + self.assertLengthEqual( + self.gather_async_pendings(self.get_all_object_nodes()), 2) + self.brain.servers.start(number=shard_nodes[2]) + + # run sharder on root to discover second shrink candidate - root is not + # yet aware of the beta object + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + # then run sharder on the shard node without the beta object, to shrink + # it to root - note this moves stale copy of alpha to the root db + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=shard_nodes[2]) + # now there are no active shards + self.assertFalse(self.get_container_shard_ranges()) + + # with other two shard servers down, listing won't find beta object + for number in shard_nodes[:2]: + self.brain.servers.stop(number=number) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(1) + + # run the updaters: the async pending update will be redirected from + # shrunk shard to the root + self.updaters.once() + self.assert_container_listing(['beta']) + self.assert_container_object_count(1) + + def test_misplaced_object_movement(self): + def merge_object(shard_range, name, deleted=0): + # it's hard to get a test to put a misplaced object into a shard, + # so this hack is used force an object record directly into a shard + # container db. Note: the actual object won't exist, we're just + # using this to test object records in container dbs. + shard_part, shard_nodes = self.brain.ring.get_nodes( + shard_range.account, shard_range.container) + shard_broker = self.get_broker( + shard_part, shard_nodes[0], shard_range.account, + shard_range.container) + shard_broker.merge_items( + [{'name': name, 'created_at': Timestamp.now().internal, + 'size': 0, 'content_type': 'text/plain', + 'etag': hashlib.md5().hexdigest(), 'deleted': deleted}]) + return shard_nodes[0] + + all_obj_names = self._make_object_names(self.max_shard_size) + self.put_objects(all_obj_names) + # Shard the container + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + for n in self.brain.node_numbers: + self.sharders.once( + number=n, additional_args='--partitions=%s' % self.brain.part) + # sanity checks + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 2) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + self.assert_container_listing(all_obj_names) + + # delete all objects - updates redirected to shards + self.delete_objects(all_obj_names) + self.assert_container_listing([]) + self.assert_container_post_ok('has objects') + + # run sharder on shard containers to update root stats + shard_ranges = self.get_container_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + self.run_sharders(shard_ranges) + self.assert_container_object_count(0) + + # First, test a misplaced object moving from one shard to another. + # run sharder on root to discover first shrink candidate + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + # then run sharder on first shard range to shrink it + shard_part, shard_nodes_numbers = self.get_part_and_node_numbers( + shard_ranges[0]) + self.sharders.once(additional_args='--partitions=%s' % shard_part) + # force a misplaced object into the shrunken shard range to simulate + # a client put that was in flight when it started to shrink + misplaced_node = merge_object(shard_ranges[0], 'alpha', deleted=0) + # root sees first shard has shrunk, only second shard range used for + # listing so alpha object not in listing + self.assertLengthEqual(self.get_container_shard_ranges(), 1) + self.assert_container_listing([]) + self.assert_container_object_count(0) + # until sharder runs on that node to move the misplaced object to the + # second shard range + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=misplaced_node['id'] + 1) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(0) # root not yet updated + + # run sharder to get root updated + self.run_sharders(shard_ranges[1]) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(1) + self.assertLengthEqual(self.get_container_shard_ranges(), 1) + + # Now we have just one active shard, test a misplaced object moving + # from that shard to the root. + # run sharder on root to discover second shrink candidate + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + # then run sharder on the shard node to shrink it to root - note this + # moves alpha to the root db + shard_part, shard_nodes_numbers = self.get_part_and_node_numbers( + shard_ranges[1]) + self.sharders.once(additional_args='--partitions=%s' % shard_part) + # now there are no active shards + self.assertFalse(self.get_container_shard_ranges()) + + # force some misplaced object updates into second shrunk shard range + merge_object(shard_ranges[1], 'alpha', deleted=1) + misplaced_node = merge_object(shard_ranges[1], 'beta', deleted=0) + # root is not yet aware of them + self.assert_container_listing(['alpha']) + self.assert_container_object_count(1) + # until sharder runs on that node to move the misplaced object + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=misplaced_node['id'] + 1) + self.assert_container_listing(['beta']) + self.assert_container_object_count(1) + self.assert_container_delete_fails() + + def test_replication_to_sharded_container_from_unsharded_old_primary(self): + primary_ids = [n['id'] for n in self.brain.nodes] + handoff_node = next(n for n in self.brain.ring.devs + if n['id'] not in primary_ids) + + # start with two sharded replicas and one unsharded with extra object + obj_names = self._setup_replication_scenario(2) + for node in self.brain.nodes[:2]: + self.assert_container_state(node, 'sharded', 2) + + # Fake a ring change - copy unsharded db which has no shard ranges to a + # handoff to create illusion of a new unpopulated primary node + node_numbers = self.brain.node_numbers + new_primary_node = self.brain.nodes[2] + new_primary_node_number = node_numbers[2] + new_primary_dir, container_hash = self.get_storage_dir( + self.brain.part, new_primary_node) + old_primary_dir, container_hash = self.get_storage_dir( + self.brain.part, handoff_node) + utils.mkdirs(os.path.dirname(old_primary_dir)) + os.rename(new_primary_dir, old_primary_dir) + + # make the cluster more or less "healthy" again + self.brain.servers.start(number=new_primary_node_number) + + # get a db on every node... + client.put_container(self.url, self.token, self.container_name) + self.assertTrue(os.path.exists(os.path.join( + new_primary_dir, container_hash + '.db'))) + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['normal_dbs'], 1) # "new" primary + self.assertLengthEqual(found['shard_dbs'], 2) # existing primaries + + # catastrophic failure! drive dies and is replaced on unchanged primary + failed_node = self.brain.nodes[0] + failed_dir, _container_hash = self.get_storage_dir( + self.brain.part, failed_node) + shutil.rmtree(failed_dir) + + # replicate the "old primary" to everybody except the "new primary" + self.brain.servers.stop(number=new_primary_node_number) + self.replicators.once(number=handoff_node['id'] + 1) + + # We're willing to rsync the retiring db to the failed primary. + # This may or may not have shard ranges, depending on the order in + # which we hit the primaries, but it definitely *doesn't* have an + # epoch in its name yet. All objects are replicated. + self.assertTrue(os.path.exists(os.path.join( + failed_dir, container_hash + '.db'))) + self.assertLengthEqual(os.listdir(failed_dir), 1) + broker = self.get_broker(self.brain.part, failed_node) + self.assertLengthEqual(broker.get_objects(), len(obj_names) + 1) + + # The other out-of-date primary is within usync range but objects are + # not replicated to it because the handoff db learns about shard ranges + broker = self.get_broker(self.brain.part, self.brain.nodes[1]) + self.assertLengthEqual(broker.get_objects(), 0) + + # Handoff db still exists and now has shard ranges! + self.assertTrue(os.path.exists(os.path.join( + old_primary_dir, container_hash + '.db'))) + broker = self.get_broker(self.brain.part, handoff_node) + shard_ranges = broker.get_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + self.assert_container_state(handoff_node, 'unsharded', 2) + + # Replicate again, this time *including* "new primary" + self.brain.servers.start(number=new_primary_node_number) + self.replicators.once(number=handoff_node['id'] + 1) + + # Ordinarily, we would have rsync_then_merge'd to "new primary" + # but instead we wait + broker = self.get_broker(self.brain.part, new_primary_node) + self.assertLengthEqual(broker.get_objects(), 0) + shard_ranges = broker.get_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + + # so the next time the sharder comes along, it can push rows out + # and delete the big db + self.sharders.once(number=handoff_node['id'] + 1, + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_state(handoff_node, 'sharded', 2) + self.assertFalse(os.path.exists(os.path.join( + old_primary_dir, container_hash + '.db'))) + # the sharded db hangs around until replication confirms durability + # first attempt is not sufficiently successful + self.brain.servers.stop(number=node_numbers[0]) + self.replicators.once(number=handoff_node['id'] + 1) + self.assertTrue(os.path.exists(old_primary_dir)) + self.assert_container_state(handoff_node, 'sharded', 2) + # second attempt is successful and handoff db is deleted + self.brain.servers.start(number=node_numbers[0]) + self.replicators.once(number=handoff_node['id'] + 1) + self.assertFalse(os.path.exists(old_primary_dir)) + + # run all the sharders, get us into a consistent state + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + self.assert_container_listing(['alpha'] + obj_names) + + def test_replication_to_empty_new_primary_from_sharding_old_primary(self): + primary_ids = [n['id'] for n in self.brain.nodes] + handoff_node = next(n for n in self.brain.ring.devs + if n['id'] not in primary_ids) + num_shards = 3 + obj_names = self._make_object_names( + num_shards * self.max_shard_size / 2) + self.put_objects(obj_names) + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + + # run replicators first time to get sync points set + self.replicators.once() + # start sharding on only the leader node + leader_node = self.brain.nodes[0] + leader_node_number = self.brain.node_numbers[0] + self.sharders.once(number=leader_node_number) + self.assert_container_state(leader_node, 'sharding', 3) + for node in self.brain.nodes[1:]: + self.assert_container_state(node, 'unsharded', 3) + + # Fake a ring change - copy leader node db to a handoff to create + # illusion of a new unpopulated primary leader node + new_primary_dir, container_hash = self.get_storage_dir( + self.brain.part, leader_node) + old_primary_dir, container_hash = self.get_storage_dir( + self.brain.part, handoff_node) + utils.mkdirs(os.path.dirname(old_primary_dir)) + os.rename(new_primary_dir, old_primary_dir) + self.assert_container_state(handoff_node, 'sharding', 3) + + # run replicator on handoff node to create a fresh db on new primary + self.assertFalse(os.path.exists(new_primary_dir)) + self.replicators.once(number=handoff_node['id'] + 1) + self.assertTrue(os.path.exists(new_primary_dir)) + self.assert_container_state(leader_node, 'sharded', 3) + broker = self.get_broker(self.brain.part, leader_node) + shard_ranges = broker.get_shard_ranges() + self.assertLengthEqual(shard_ranges, 3) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED], + [sr.state for sr in shard_ranges]) + + # db still exists on handoff + self.assertTrue(os.path.exists(old_primary_dir)) + self.assert_container_state(handoff_node, 'sharding', 3) + # continue sharding it... + self.sharders.once(number=handoff_node['id'] + 1) + self.assert_container_state(leader_node, 'sharded', 3) + # now handoff is fully sharded the replicator will delete it + self.replicators.once(number=handoff_node['id'] + 1) + self.assertFalse(os.path.exists(old_primary_dir)) + + # all primaries now have active shard ranges but only one is in sharded + # state + self.assert_container_state(leader_node, 'sharded', 3) + for node in self.brain.nodes[1:]: + self.assert_container_state(node, 'unsharded', 3) + node_data = self.direct_get_container_shard_ranges() + for node_id, (hdrs, shard_ranges) in node_data.items(): + with annotate_failure( + 'node id %s from %s' % (node_id, node_data.keys)): + self.assert_shard_range_state(ShardRange.ACTIVE, shard_ranges) + + # check handoff cleaved all objects before it was deleted - stop all + # but leader node so that listing is fetched from shards + for number in self.brain.node_numbers[1:3]: + self.brain.servers.stop(number=number) + + self.assert_container_listing(obj_names) + + for number in self.brain.node_numbers[1:3]: + self.brain.servers.start(number=number) + + self.sharders.once() + self.assert_container_state(leader_node, 'sharded', 3) + for node in self.brain.nodes[1:]: + self.assert_container_state(node, 'sharding', 3) + self.sharders.once() + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 3) + + self.assert_container_listing(obj_names) diff --git a/test/unit/__init__.py b/test/unit/__init__.py index a07b1b2879..278c55a4ca 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -751,6 +751,8 @@ class FakeStatus(object): :param response_sleep: float, time to eventlet sleep during response """ # connect exception + if inspect.isclass(status) and issubclass(status, Exception): + raise status('FakeStatus Error') if isinstance(status, (Exception, eventlet.Timeout)): raise status if isinstance(status, tuple): @@ -1063,6 +1065,15 @@ def make_timestamp_iter(offset=0): for t in itertools.count(int(time.time()) + offset)) +@contextmanager +def mock_timestamp_now(now=None): + if now is None: + now = Timestamp.now() + with mocklib.patch('swift.common.utils.Timestamp.now', + classmethod(lambda c: now)): + yield now + + class Timeout(object): def __init__(self, seconds): self.seconds = seconds @@ -1323,3 +1334,55 @@ def skip_if_no_xattrs(): if not xattr_supported_check(): raise SkipTest('Large xattrs not supported in `%s`. Skipping test' % gettempdir()) + + +def unlink_files(paths): + for path in paths: + try: + os.unlink(path) + except OSError as err: + if err.errno != errno.ENOENT: + raise + + +class FakeHTTPResponse(object): + + def __init__(self, resp): + self.resp = resp + + @property + def status(self): + return self.resp.status_int + + @property + def data(self): + return self.resp.body + + +def attach_fake_replication_rpc(rpc, replicate_hook=None, errors=None): + class FakeReplConnection(object): + + def __init__(self, node, partition, hash_, logger): + self.logger = logger + self.node = node + self.partition = partition + self.path = '/%s/%s/%s' % (node['device'], partition, hash_) + self.host = node['replication_ip'] + + def replicate(self, op, *sync_args): + print('REPLICATE: %s, %s, %r' % (self.path, op, sync_args)) + resp = None + if errors and op in errors and errors[op]: + resp = errors[op].pop(0) + if not resp: + replicate_args = self.path.lstrip('/').split('/') + args = [op] + copy.deepcopy(list(sync_args)) + with mock_check_drive(isdir=not rpc.mount_check, + ismount=rpc.mount_check): + swob_response = rpc.dispatch(replicate_args, args) + resp = FakeHTTPResponse(swob_response) + if replicate_hook: + replicate_hook(op, *sync_args) + return resp + + return FakeReplConnection diff --git a/test/unit/account/test_server.py b/test/unit/account/test_server.py index 2c00773441..4a8f58cb05 100644 --- a/test/unit/account/test_server.py +++ b/test/unit/account/test_server.py @@ -404,7 +404,7 @@ class TestAccountController(unittest.TestCase): elif state[0] == 'race': # Save the original db_file attribute value self._saved_db_file = self.db_file - self.db_file += '.doesnotexist' + self._db_file += '.doesnotexist' def initialize(self, *args, **kwargs): if state[0] == 'initial': @@ -413,7 +413,7 @@ class TestAccountController(unittest.TestCase): elif state[0] == 'race': # Restore the original db_file attribute to get the race # behavior - self.db_file = self._saved_db_file + self._db_file = self._saved_db_file return super(InterceptedAcBr, self).initialize(*args, **kwargs) with mock.patch("swift.account.server.AccountBroker", InterceptedAcBr): diff --git a/test/unit/cli/test_info.py b/test/unit/cli/test_info.py index d1ea79cff3..1d5c56e9f4 100644 --- a/test/unit/cli/test_info.py +++ b/test/unit/cli/test_info.py @@ -31,6 +31,7 @@ from swift.cli.info import (print_db_info_metadata, print_ring_locations, parse_get_node_args) from swift.account.server import AccountController from swift.container.server import ContainerController +from swift.container.backend import UNSHARDED, SHARDED from swift.obj.diskfile import write_metadata @@ -103,17 +104,18 @@ class TestCliInfo(TestCliInfoBase): self.assertRaisesMessage(ValueError, 'Info is incomplete', print_db_info_metadata, 'container', {}, {}) - info = dict( - account='acct', - created_at=100.1, - put_timestamp=106.3, - delete_timestamp=107.9, - status_changed_at=108.3, - container_count='3', - object_count='20', - bytes_used='42') - info['hash'] = 'abaddeadbeefcafe' - info['id'] = 'abadf100d0ddba11' + info = { + 'account': 'acct', + 'created_at': 100.1, + 'put_timestamp': 106.3, + 'delete_timestamp': 107.9, + 'status_changed_at': 108.3, + 'container_count': '3', + 'object_count': '20', + 'bytes_used': '42', + 'hash': 'abaddeadbeefcafe', + 'id': 'abadf100d0ddba11', + } md = {'x-account-meta-mydata': ('swift', '0000000000.00000'), 'x-other-something': ('boo', '0000000000.00000')} out = StringIO() @@ -154,7 +156,9 @@ No system metadata found in db file reported_object_count='20', reported_bytes_used='42', x_container_foo='bar', - x_container_bar='goo') + x_container_bar='goo', + db_state=UNSHARDED, + is_root=True) info['hash'] = 'abaddeadbeefcafe' info['id'] = 'abadf100d0ddba11' md = {'x-container-sysmeta-mydata': ('swift', '0000000000.00000')} @@ -182,10 +186,88 @@ Metadata: X-Container-Bar: goo X-Container-Foo: bar System Metadata: {'mydata': 'swift'} -No user metadata found in db file''' % POLICIES[0].name +No user metadata found in db file +Sharding Metadata: + Type: root + State: unsharded''' % POLICIES[0].name self.assertEqual(sorted(out.getvalue().strip().split('\n')), sorted(exp_out.split('\n'))) + def test_print_db_info_metadata_with_shard_ranges(self): + + shard_ranges = [utils.ShardRange( + name='.sharded_a/shard_range_%s' % i, + timestamp=utils.Timestamp(i), lower='%da' % i, + upper='%dz' % i, object_count=i, bytes_used=i, + meta_timestamp=utils.Timestamp(i)) for i in range(1, 4)] + shard_ranges[0].state = utils.ShardRange.CLEAVED + shard_ranges[1].state = utils.ShardRange.CREATED + + info = dict( + account='acct', + container='cont', + storage_policy_index=0, + created_at='0000000100.10000', + put_timestamp='0000000106.30000', + delete_timestamp='0000000107.90000', + status_changed_at='0000000108.30000', + object_count='20', + bytes_used='42', + reported_put_timestamp='0000010106.30000', + reported_delete_timestamp='0000010107.90000', + reported_object_count='20', + reported_bytes_used='42', + db_state=SHARDED, + is_root=True, + shard_ranges=shard_ranges) + info['hash'] = 'abaddeadbeefcafe' + info['id'] = 'abadf100d0ddba11' + out = StringIO() + with mock.patch('sys.stdout', out): + print_db_info_metadata('container', info, {}) + exp_out = '''Path: /acct/cont + Account: acct + Container: cont + Container Hash: d49d0ecbb53be1fcc49624f2f7c7ccae +Metadata: + Created at: 1970-01-01T00:01:40.100000 (0000000100.10000) + Put Timestamp: 1970-01-01T00:01:46.300000 (0000000106.30000) + Delete Timestamp: 1970-01-01T00:01:47.900000 (0000000107.90000) + Status Timestamp: 1970-01-01T00:01:48.300000 (0000000108.30000) + Object Count: 20 + Bytes Used: 42 + Storage Policy: %s (0) + Reported Put Timestamp: 1970-01-01T02:48:26.300000 (0000010106.30000) + Reported Delete Timestamp: 1970-01-01T02:48:27.900000 (0000010107.90000) + Reported Object Count: 20 + Reported Bytes Used: 42 + Chexor: abaddeadbeefcafe + UUID: abadf100d0ddba11 +No system metadata found in db file +No user metadata found in db file +Sharding Metadata: + Type: root + State: sharded +Shard Ranges (3): + Name: .sharded_a/shard_range_1 + lower: '1a', upper: '1z' + Object Count: 1, Bytes Used: 1, State: cleaved (30) + Created at: 1970-01-01T00:00:01.000000 (0000000001.00000) + Meta Timestamp: 1970-01-01T00:00:01.000000 (0000000001.00000) + Name: .sharded_a/shard_range_2 + lower: '2a', upper: '2z' + Object Count: 2, Bytes Used: 2, State: created (20) + Created at: 1970-01-01T00:00:02.000000 (0000000002.00000) + Meta Timestamp: 1970-01-01T00:00:02.000000 (0000000002.00000) + Name: .sharded_a/shard_range_3 + lower: '3a', upper: '3z' + Object Count: 3, Bytes Used: 3, State: found (10) + Created at: 1970-01-01T00:00:03.000000 (0000000003.00000) + Meta Timestamp: 1970-01-01T00:00:03.000000 (0000000003.00000)''' %\ + POLICIES[0].name + self.assertEqual(sorted(out.getvalue().strip().split('\n')), + sorted(exp_out.strip().split('\n'))) + def test_print_ring_locations_invalid_args(self): self.assertRaises(ValueError, print_ring_locations, None, 'dir', 'acct') @@ -423,14 +505,8 @@ No user metadata found in db file''' % POLICIES[0].name '1', 'b47', 'dc5be2aa4347a22a0fee6bc7de505b47', 'dc5be2aa4347a22a0fee6bc7de505b47.db') - try: - print_info('account', db_file, swift_dir=self.testdir) - except Exception: - exp_raised = True - if exp_raised: - self.fail("Unexpected exception raised") - else: - self.assertGreater(len(out.getvalue().strip()), 800) + print_info('account', db_file, swift_dir=self.testdir) + self.assertGreater(len(out.getvalue().strip()), 800) controller = ContainerController( {'devices': self.testdir, 'mount_check': 'false'}) diff --git a/test/unit/cli/test_manage_shard_ranges.py b/test/unit/cli/test_manage_shard_ranges.py new file mode 100644 index 0000000000..8cefa5b19c --- /dev/null +++ b/test/unit/cli/test_manage_shard_ranges.py @@ -0,0 +1,362 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy +# of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from __future__ import unicode_literals + +import json +import os +import unittest +import mock +from shutil import rmtree +from tempfile import mkdtemp + +from six.moves import cStringIO as StringIO + +from swift.cli.manage_shard_ranges import main +from swift.common import utils +from swift.common.utils import Timestamp, ShardRange +from swift.container.backend import ContainerBroker +from test.unit import mock_timestamp_now + + +class TestManageShardRanges(unittest.TestCase): + def setUp(self): + self.testdir = os.path.join(mkdtemp(), 'tmp_test_cli_find_shards') + utils.mkdirs(self.testdir) + rmtree(self.testdir) + self.shard_data = [ + {'index': 0, 'lower': '', 'upper': 'obj09', 'object_count': 10}, + {'index': 1, 'lower': 'obj09', 'upper': 'obj19', + 'object_count': 10}, + {'index': 2, 'lower': 'obj19', 'upper': 'obj29', + 'object_count': 10}, + {'index': 3, 'lower': 'obj29', 'upper': 'obj39', + 'object_count': 10}, + {'index': 4, 'lower': 'obj39', 'upper': 'obj49', + 'object_count': 10}, + {'index': 5, 'lower': 'obj49', 'upper': 'obj59', + 'object_count': 10}, + {'index': 6, 'lower': 'obj59', 'upper': 'obj69', + 'object_count': 10}, + {'index': 7, 'lower': 'obj69', 'upper': 'obj79', + 'object_count': 10}, + {'index': 8, 'lower': 'obj79', 'upper': 'obj89', + 'object_count': 10}, + {'index': 9, 'lower': 'obj89', 'upper': '', 'object_count': 10}, + ] + + def tearDown(self): + rmtree(os.path.dirname(self.testdir)) + + def assert_starts_with(self, value, prefix): + self.assertTrue(value.startswith(prefix), + "%r does not start with %r" % (value, prefix)) + + def assert_formatted_json(self, output, expected): + try: + loaded = json.loads(output) + except ValueError as err: + self.fail('Invalid JSON: %s\n%r' % (err, output)) + # Check this one first, for a prettier diff + self.assertEqual(loaded, expected) + formatted = json.dumps(expected, sort_keys=True, indent=2) + '\n' + self.assertEqual(output, formatted) + + def _make_broker(self, account='a', container='c', + device='sda', part=0): + datadir = os.path.join( + self.testdir, device, 'containers', str(part), 'ash', 'hash') + db_file = os.path.join(datadir, 'hash.db') + broker = ContainerBroker( + db_file, account=account, container=container) + broker.initialize() + return broker + + def test_find_shard_ranges(self): + db_file = os.path.join(self.testdir, 'hash.db') + broker = ContainerBroker(db_file) + broker.account = 'a' + broker.container = 'c' + broker.initialize() + ts = utils.Timestamp.now() + broker.merge_items([ + {'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0, + 'content_type': 'application/octet-stream', 'etag': 'not-really', + 'deleted': 0, 'storage_policy_index': 0, + 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal} + for i in range(100)]) + + # Default uses a large enough value that sharding isn't required + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([db_file, 'find']) + self.assert_formatted_json(out.getvalue(), []) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + self.assert_starts_with(err_lines[1], 'Found 0 ranges in ') + + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([db_file, 'find', '100']) + self.assert_formatted_json(out.getvalue(), []) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + self.assert_starts_with(err_lines[1], 'Found 0 ranges in ') + + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([db_file, 'find', '99']) + self.assert_formatted_json(out.getvalue(), [ + {'index': 0, 'lower': '', 'upper': 'obj98', 'object_count': 99}, + {'index': 1, 'lower': 'obj98', 'upper': '', 'object_count': 1}, + ]) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + self.assert_starts_with(err_lines[1], 'Found 2 ranges in ') + + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([db_file, 'find', '10']) + self.assert_formatted_json(out.getvalue(), [ + {'index': 0, 'lower': '', 'upper': 'obj09', 'object_count': 10}, + {'index': 1, 'lower': 'obj09', 'upper': 'obj19', + 'object_count': 10}, + {'index': 2, 'lower': 'obj19', 'upper': 'obj29', + 'object_count': 10}, + {'index': 3, 'lower': 'obj29', 'upper': 'obj39', + 'object_count': 10}, + {'index': 4, 'lower': 'obj39', 'upper': 'obj49', + 'object_count': 10}, + {'index': 5, 'lower': 'obj49', 'upper': 'obj59', + 'object_count': 10}, + {'index': 6, 'lower': 'obj59', 'upper': 'obj69', + 'object_count': 10}, + {'index': 7, 'lower': 'obj69', 'upper': 'obj79', + 'object_count': 10}, + {'index': 8, 'lower': 'obj79', 'upper': 'obj89', + 'object_count': 10}, + {'index': 9, 'lower': 'obj89', 'upper': '', 'object_count': 10}, + ]) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + self.assert_starts_with(err_lines[1], 'Found 10 ranges in ') + + def test_info(self): + broker = self._make_broker() + broker.update_metadata({'X-Container-Sysmeta-Sharding': + (True, Timestamp.now().internal)}) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([broker.db_file, 'info']) + expected = ['Sharding enabled = True', + 'Own shard range: None', + 'db_state = unsharded', + 'Metadata:', + ' X-Container-Sysmeta-Sharding = True'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + + retiring_db_id = broker.get_info()['id'] + broker.merge_shard_ranges(ShardRange('.shards/cc', Timestamp.now())) + epoch = Timestamp.now() + with mock_timestamp_now(epoch) as now: + broker.enable_sharding(epoch) + self.assertTrue(broker.set_sharding_state()) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + with mock_timestamp_now(now): + main([broker.db_file, 'info']) + expected = ['Sharding enabled = True', + 'Own shard range: {', + ' "bytes_used": 0, ', + ' "deleted": 0, ', + ' "epoch": "%s", ' % epoch.internal, + ' "lower": "", ', + ' "meta_timestamp": "%s", ' % now.internal, + ' "name": "a/c", ', + ' "object_count": 0, ', + ' "state": "sharding", ', + ' "state_timestamp": "%s", ' % now.internal, + ' "timestamp": "%s", ' % now.internal, + ' "upper": ""', + '}', + 'db_state = sharding', + 'Retiring db id: %s' % retiring_db_id, + 'Cleaving context: {', + ' "cleave_to_row": null, ', + ' "cleaving_done": false, ', + ' "cursor": "", ', + ' "last_cleave_to_row": null, ', + ' "max_row": -1, ', + ' "misplaced_done": false, ', + ' "ranges_done": 0, ', + ' "ranges_todo": 0, ', + ' "ref": "%s"' % retiring_db_id, + '}', + 'Metadata:', + ' X-Container-Sysmeta-Sharding = True'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + + self.assertTrue(broker.set_sharded_state()) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + with mock_timestamp_now(now): + main([broker.db_file, 'info']) + expected = ['Sharding enabled = True', + 'Own shard range: {', + ' "bytes_used": 0, ', + ' "deleted": 0, ', + ' "epoch": "%s", ' % epoch.internal, + ' "lower": "", ', + ' "meta_timestamp": "%s", ' % now.internal, + ' "name": "a/c", ', + ' "object_count": 0, ', + ' "state": "sharding", ', + ' "state_timestamp": "%s", ' % now.internal, + ' "timestamp": "%s", ' % now.internal, + ' "upper": ""', + '}', + 'db_state = sharded', + 'Metadata:', + ' X-Container-Sysmeta-Sharding = True'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + + def test_replace(self): + broker = self._make_broker() + broker.update_metadata({'X-Container-Sysmeta-Sharding': + (True, Timestamp.now().internal)}) + input_file = os.path.join(self.testdir, 'shards') + with open(input_file, 'wb') as fd: + json.dump(self.shard_data, fd) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([broker.db_file, 'replace', input_file]) + expected = [ + 'No shard ranges found to delete.', + 'Injected 10 shard ranges.', + 'Run container-replicator to replicate them to other nodes.', + 'Use the enable sub-command to enable sharding.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + self.assertEqual( + [(data['lower'], data['upper']) for data in self.shard_data], + [(sr.lower_str, sr.upper_str) for sr in broker.get_shard_ranges()]) + + def _assert_enabled(self, broker, epoch): + own_sr = broker.get_own_shard_range() + self.assertEqual(ShardRange.SHARDING, own_sr.state) + self.assertEqual(epoch, own_sr.epoch) + self.assertEqual(ShardRange.MIN, own_sr.lower) + self.assertEqual(ShardRange.MAX, own_sr.upper) + self.assertEqual( + 'True', broker.metadata['X-Container-Sysmeta-Sharding'][0]) + + def test_enable(self): + broker = self._make_broker() + broker.update_metadata({'X-Container-Sysmeta-Sharding': + (True, Timestamp.now().internal)}) + # no shard ranges + out = StringIO() + err = StringIO() + with self.assertRaises(SystemExit): + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([broker.db_file, 'enable']) + expected = ["WARNING: invalid shard ranges: ['No shard ranges.'].", + 'Aborting.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + + # success + shard_ranges = [] + for data in self.shard_data: + path = ShardRange.make_path( + '.shards_a', 'c', 'c', Timestamp.now(), data['index']) + shard_ranges.append( + ShardRange(path, Timestamp.now(), data['lower'], + data['upper'], data['object_count'])) + broker.merge_shard_ranges(shard_ranges) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + with mock_timestamp_now() as now: + main([broker.db_file, 'enable']) + expected = [ + "Container moved to state 'sharding' with epoch %s." % + now.internal, + 'Run container-sharder on all nodes to shard the container.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + self._assert_enabled(broker, now) + + # already enabled + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([broker.db_file, 'enable']) + expected = [ + "Container already in state 'sharding' with epoch %s." % + now.internal, + 'No action required.', + 'Run container-sharder on all nodes to shard the container.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + self._assert_enabled(broker, now) + + def test_find_replace_enable(self): + db_file = os.path.join(self.testdir, 'hash.db') + broker = ContainerBroker(db_file) + broker.account = 'a' + broker.container = 'c' + broker.initialize() + ts = utils.Timestamp.now() + broker.merge_items([ + {'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0, + 'content_type': 'application/octet-stream', 'etag': 'not-really', + 'deleted': 0, 'storage_policy_index': 0, + 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal} + for i in range(100)]) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + with mock_timestamp_now() as now: + main([broker.db_file, 'find_and_replace', '10', '--enable']) + expected = [ + 'No shard ranges found to delete.', + 'Injected 10 shard ranges.', + 'Run container-replicator to replicate them to other nodes.', + "Container moved to state 'sharding' with epoch %s." % + now.internal, + 'Run container-sharder on all nodes to shard the container.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + self._assert_enabled(broker, now) + self.assertEqual( + [(data['lower'], data['upper']) for data in self.shard_data], + [(sr.lower_str, sr.upper_str) for sr in broker.get_shard_ranges()]) diff --git a/test/unit/common/test_db.py b/test/unit/common/test_db.py index f605d0acba..6f723e13a7 100644 --- a/test/unit/common/test_db.py +++ b/test/unit/common/test_db.py @@ -38,7 +38,7 @@ from swift.common.constraints import \ MAX_META_VALUE_LENGTH, MAX_META_COUNT, MAX_META_OVERALL_SIZE from swift.common.db import chexor, dict_factory, get_db_connection, \ DatabaseBroker, DatabaseConnectionError, DatabaseAlreadyExists, \ - GreenDBConnection, PICKLE_PROTOCOL + GreenDBConnection, PICKLE_PROTOCOL, zero_like from swift.common.utils import normalize_timestamp, mkdirs, Timestamp from swift.common.exceptions import LockTimeout from swift.common.swob import HTTPException @@ -46,6 +46,30 @@ from swift.common.swob import HTTPException from test.unit import with_tempdir +class TestHelperFunctions(unittest.TestCase): + + def test_zero_like(self): + expectations = { + # value => expected + None: True, + True: False, + '': True, + 'asdf': False, + 0: True, + 1: False, + '0': True, + '1': False, + } + errors = [] + for value, expected in expectations.items(): + rv = zero_like(value) + if rv != expected: + errors.append('zero_like(%r) => %r expected %r' % ( + value, rv, expected)) + if errors: + self.fail('Some unexpected return values:\n' + '\n'.join(errors)) + + class TestDatabaseConnectionError(unittest.TestCase): def test_str(self): @@ -989,6 +1013,19 @@ class TestDatabaseBroker(unittest.TestCase): self.assertEqual(broker.get_sync(uuid3), 2) broker.merge_syncs([{'sync_point': 5, 'remote_id': uuid2}]) self.assertEqual(broker.get_sync(uuid2), 5) + # max sync point sticks + broker.merge_syncs([{'sync_point': 5, 'remote_id': uuid2}]) + self.assertEqual(broker.get_sync(uuid2), 5) + self.assertEqual(broker.get_sync(uuid3), 2) + broker.merge_syncs([{'sync_point': 4, 'remote_id': uuid2}]) + self.assertEqual(broker.get_sync(uuid2), 5) + self.assertEqual(broker.get_sync(uuid3), 2) + broker.merge_syncs([{'sync_point': -1, 'remote_id': uuid2}, + {'sync_point': 3, 'remote_id': uuid3}]) + self.assertEqual(broker.get_sync(uuid2), 5) + self.assertEqual(broker.get_sync(uuid3), 3) + self.assertEqual(broker.get_sync(uuid2, incoming=False), 3) + self.assertEqual(broker.get_sync(uuid3, incoming=False), 4) def test_get_replication_info(self): self.get_replication_info_tester(metadata=False) @@ -1089,11 +1126,9 @@ class TestDatabaseBroker(unittest.TestCase): 'max_row': 1, 'id': broker_uuid, 'metadata': broker_metadata}) return broker - def test_metadata(self): - def reclaim(broker, timestamp): - with broker.get() as conn: - broker._reclaim(conn, timestamp) - conn.commit() + # only testing _reclaim_metadata here + @patch.object(DatabaseBroker, '_reclaim') + def test_metadata(self, mock_reclaim): # Initializes a good broker for us broker = self.get_replication_info_tester(metadata=True) # Add our first item @@ -1134,7 +1169,7 @@ class TestDatabaseBroker(unittest.TestCase): self.assertEqual(broker.metadata['Second'], [second_value, second_timestamp]) # Reclaim at point before second item was deleted - reclaim(broker, normalize_timestamp(3)) + broker.reclaim(normalize_timestamp(3), normalize_timestamp(3)) self.assertIn('First', broker.metadata) self.assertEqual(broker.metadata['First'], [first_value, first_timestamp]) @@ -1142,7 +1177,7 @@ class TestDatabaseBroker(unittest.TestCase): self.assertEqual(broker.metadata['Second'], [second_value, second_timestamp]) # Reclaim at point second item was deleted - reclaim(broker, normalize_timestamp(4)) + broker.reclaim(normalize_timestamp(4), normalize_timestamp(4)) self.assertIn('First', broker.metadata) self.assertEqual(broker.metadata['First'], [first_value, first_timestamp]) @@ -1150,11 +1185,18 @@ class TestDatabaseBroker(unittest.TestCase): self.assertEqual(broker.metadata['Second'], [second_value, second_timestamp]) # Reclaim after point second item was deleted - reclaim(broker, normalize_timestamp(5)) + broker.reclaim(normalize_timestamp(5), normalize_timestamp(5)) self.assertIn('First', broker.metadata) self.assertEqual(broker.metadata['First'], [first_value, first_timestamp]) self.assertNotIn('Second', broker.metadata) + # Delete first item (by setting to empty string) + first_timestamp = normalize_timestamp(6) + broker.update_metadata({'First': ['', first_timestamp]}) + self.assertIn('First', broker.metadata) + # Check that sync_timestamp doesn't cause item to be reclaimed + broker.reclaim(normalize_timestamp(5), normalize_timestamp(99)) + self.assertIn('First', broker.metadata) def test_update_metadata_missing_container_info(self): # Test missing container_info/container_stat row @@ -1197,7 +1239,7 @@ class TestDatabaseBroker(unittest.TestCase): exc = None try: with broker.get() as conn: - broker._reclaim(conn, 0) + broker._reclaim_metadata(conn, 0) except Exception as err: exc = err self.assertEqual( @@ -1333,5 +1375,141 @@ class TestDatabaseBroker(unittest.TestCase): else: self.fail('Expected an exception to be raised') + def test_skip_commits(self): + broker = DatabaseBroker(':memory:') + self.assertTrue(broker._skip_commit_puts()) + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + self.assertTrue(broker._skip_commit_puts()) + + # not initialized + db_file = os.path.join(self.testdir, '1.db') + broker = DatabaseBroker(db_file) + self.assertFalse(os.path.exists(broker.db_file)) # sanity check + self.assertTrue(broker._skip_commit_puts()) + + # no pending file + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + self.assertTrue(os.path.exists(broker.db_file)) # sanity check + self.assertFalse(os.path.exists(broker.pending_file)) # sanity check + self.assertTrue(broker._skip_commit_puts()) + + # pending file exists + with open(broker.pending_file, 'wb'): + pass + self.assertTrue(os.path.exists(broker.pending_file)) # sanity check + self.assertFalse(broker._skip_commit_puts()) + + # skip_commits is True + broker.skip_commits = True + self.assertTrue(broker._skip_commit_puts()) + + # re-init + broker = DatabaseBroker(db_file) + self.assertFalse(broker._skip_commit_puts()) + + # constructor can override + broker = DatabaseBroker(db_file, skip_commits=True) + self.assertTrue(broker._skip_commit_puts()) + + def test_commit_puts(self): + db_file = os.path.join(self.testdir, '1.db') + broker = DatabaseBroker(db_file) + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + with open(broker.pending_file, 'wb'): + pass + + # merge given list + with patch.object(broker, 'merge_items') as mock_merge_items: + broker._commit_puts(['test']) + mock_merge_items.assert_called_once_with(['test']) + + # load file and merge + with open(broker.pending_file, 'wb') as fd: + fd.write(':1:2:99') + with patch.object(broker, 'merge_items') as mock_merge_items: + broker._commit_puts_load = lambda l, e: l.append(e) + broker._commit_puts() + mock_merge_items.assert_called_once_with(['1', '2', '99']) + self.assertEqual(0, os.path.getsize(broker.pending_file)) + + # load file and merge with given list + with open(broker.pending_file, 'wb') as fd: + fd.write(':bad') + with patch.object(broker, 'merge_items') as mock_merge_items: + broker._commit_puts_load = lambda l, e: l.append(e) + broker._commit_puts(['not']) + mock_merge_items.assert_called_once_with(['not', 'bad']) + self.assertEqual(0, os.path.getsize(broker.pending_file)) + + # skip_commits True - no merge + db_file = os.path.join(self.testdir, '2.db') + broker = DatabaseBroker(db_file, skip_commits=True) + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + with open(broker.pending_file, 'wb') as fd: + fd.write(':ignored') + with patch.object(broker, 'merge_items') as mock_merge_items: + with self.assertRaises(DatabaseConnectionError) as cm: + broker._commit_puts(['hmmm']) + mock_merge_items.assert_not_called() + self.assertIn('commits not accepted', str(cm.exception)) + with open(broker.pending_file, 'rb') as fd: + self.assertEqual(':ignored', fd.read()) + + def test_put_record(self): + db_file = os.path.join(self.testdir, '1.db') + broker = DatabaseBroker(db_file) + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + + # pending file created and record written + broker.make_tuple_for_pickle = lambda x: x.upper() + with patch.object(broker, '_commit_puts') as mock_commit_puts: + broker.put_record('pinky') + mock_commit_puts.assert_not_called() + with open(broker.pending_file, 'rb') as fd: + pending = fd.read() + items = pending.split(':') + self.assertEqual(['PINKY'], + [pickle.loads(i.decode('base64')) for i in items[1:]]) + + # record appended + with patch.object(broker, '_commit_puts') as mock_commit_puts: + broker.put_record('perky') + mock_commit_puts.assert_not_called() + with open(broker.pending_file, 'rb') as fd: + pending = fd.read() + items = pending.split(':') + self.assertEqual(['PINKY', 'PERKY'], + [pickle.loads(i.decode('base64')) for i in items[1:]]) + + # pending file above cap + cap = swift.common.db.PENDING_CAP + while os.path.getsize(broker.pending_file) < cap: + with open(broker.pending_file, 'ab') as fd: + fd.write('x' * 100000) + with patch.object(broker, '_commit_puts') as mock_commit_puts: + broker.put_record('direct') + mock_commit_puts.called_once_with(['direct']) + + # records shouldn't be put to brokers with skip_commits True because + # they cannot be accepted if the pending file is full + broker.skip_commits = True + with open(broker.pending_file, 'wb'): + # empty the pending file + pass + with patch.object(broker, '_commit_puts') as mock_commit_puts: + with self.assertRaises(DatabaseConnectionError) as cm: + broker.put_record('unwelcome') + self.assertIn('commits not accepted', str(cm.exception)) + mock_commit_puts.assert_not_called() + with open(broker.pending_file, 'rb') as fd: + pending = fd.read() + self.assertFalse(pending) + + if __name__ == '__main__': unittest.main() diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py index 7c4143d641..21eedb9b7d 100644 --- a/test/unit/common/test_db_replicator.py +++ b/test/unit/common/test_db_replicator.py @@ -16,6 +16,8 @@ from __future__ import print_function import unittest from contextlib import contextmanager + +import eventlet import os import logging import errno @@ -37,6 +39,7 @@ from swift.common.exceptions import DriveNotMounted from swift.common.swob import HTTPException from test import unit +from test.unit import FakeLogger, attach_fake_replication_rpc from test.unit.common.test_db import ExampleBroker @@ -160,6 +163,11 @@ class ReplHttp(object): self.set_status = set_status replicated = False host = 'localhost' + node = { + 'ip': '127.0.0.1', + 'port': '6000', + 'device': 'sdb', + } def replicate(self, *args): self.replicated = True @@ -230,11 +238,27 @@ class FakeBroker(object): 'put_timestamp': 1, 'created_at': 1, 'count': 0, + 'max_row': 99, + 'id': 'ID', + 'metadata': {} }) if self.stub_replication_info: info.update(self.stub_replication_info) return info + def get_max_row(self, table=None): + return self.get_replication_info()['max_row'] + + def is_reclaimable(self, now, reclaim_age): + info = self.get_replication_info() + return info['count'] == 0 and ( + (now - reclaim_age) > + info['delete_timestamp'] > + info['put_timestamp']) + + def get_other_replication_items(self): + return None + def reclaim(self, item_timestamp, sync_timestamp): pass @@ -249,6 +273,9 @@ class FakeBroker(object): self.put_timestamp = put_timestamp self.delete_timestamp = delete_timestamp + def get_brokers(self): + return [self] + class FakeAccountBroker(FakeBroker): db_type = 'account' @@ -273,6 +300,7 @@ class TestDBReplicator(unittest.TestCase): self.recon_cache = mkdtemp() rmtree(self.recon_cache, ignore_errors=1) os.mkdir(self.recon_cache) + self.logger = unit.debug_logger('test-replicator') def tearDown(self): for patcher in self._patchers: @@ -287,6 +315,7 @@ class TestDBReplicator(unittest.TestCase): def stub_delete_db(self, broker): self.delete_db_calls.append('/path/to/file') + return True def test_creation(self): # later config should be extended to assert more config options @@ -647,11 +676,107 @@ class TestDBReplicator(unittest.TestCase): }) def test_replicate_object(self): + # verify return values from replicate_object db_replicator.ring = FakeRingWithNodes() - replicator = TestReplicator({}) - replicator.delete_db = self.stub_delete_db - replicator._replicate_object('0', '/path/to/file', 'node_id') - self.assertEqual([], self.delete_db_calls) + db_path = '/path/to/file' + replicator = TestReplicator({}, logger=FakeLogger()) + info = FakeBroker().get_replication_info() + # make remote appear to be in sync + rinfo = {'point': info['max_row'], 'id': 'remote_id'} + + class FakeResponse(object): + def __init__(self, status, rinfo): + self._status = status + self.data = json.dumps(rinfo) + + @property + def status(self): + if isinstance(self._status, (Exception, eventlet.Timeout)): + raise self._status + return self._status + + # all requests fail + replicate = 'swift.common.db_replicator.ReplConnection.replicate' + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(500, None), + FakeResponse(500, None), + FakeResponse(500, None)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((False, [False, False, False]), res) + self.assertEqual(0, mock_delete.call_count) + self.assertFalse(replicator.logger.get_lines_for_level('error')) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) + replicator.logger.clear() + + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(Exception('ugh'), None), + FakeResponse(eventlet.Timeout(), None), + FakeResponse(200, rinfo)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((False, [False, False, True]), res) + self.assertEqual(0, mock_delete.call_count) + lines = replicator.logger.get_lines_for_level('error') + self.assertIn('ERROR syncing', lines[0]) + self.assertIn('ERROR syncing', lines[1]) + self.assertFalse(lines[2:]) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) + replicator.logger.clear() + + # partial success + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(200, rinfo), + FakeResponse(200, rinfo), + FakeResponse(500, None)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((False, [True, True, False]), res) + self.assertEqual(0, mock_delete.call_count) + self.assertFalse(replicator.logger.get_lines_for_level('error')) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) + replicator.logger.clear() + + # 507 triggers additional requests + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(200, rinfo), + FakeResponse(200, rinfo), + FakeResponse(507, None), + FakeResponse(507, None), + FakeResponse(200, rinfo)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((False, [True, True, False, False, True]), res) + self.assertEqual(0, mock_delete.call_count) + lines = replicator.logger.get_lines_for_level('error') + self.assertIn('Remote drive not mounted', lines[0]) + self.assertIn('Remote drive not mounted', lines[1]) + self.assertFalse(lines[2:]) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) + replicator.logger.clear() + + # all requests succeed; node id == 'node_id' causes node to be + # considered a handoff so expect the db to be deleted + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(200, rinfo), + FakeResponse(200, rinfo), + FakeResponse(200, rinfo)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((True, [True, True, True]), res) + self.assertEqual(1, mock_delete.call_count) + self.assertFalse(replicator.logger.get_lines_for_level('error')) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) def test_replicate_object_quarantine(self): replicator = TestReplicator({}) @@ -695,8 +820,122 @@ class TestDBReplicator(unittest.TestCase): replicator.brokerclass = FakeAccountBroker replicator._repl_to_node = lambda *args: True replicator.delete_db = self.stub_delete_db - replicator._replicate_object('0', '/path/to/file', 'node_id') + orig_cleanup = replicator.cleanup_post_replicate + with mock.patch.object(replicator, 'cleanup_post_replicate', + side_effect=orig_cleanup) as mock_cleanup: + replicator._replicate_object('0', '/path/to/file', 'node_id') + mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, [True] * 3) + self.assertIsInstance(mock_cleanup.call_args[0][0], + replicator.brokerclass) self.assertEqual(['/path/to/file'], self.delete_db_calls) + self.assertEqual(0, replicator.stats['failure']) + + def test_replicate_object_delete_delegated_to_cleanup_post_replicate(self): + replicator = TestReplicator({}) + replicator.ring = FakeRingWithNodes().Ring('path') + replicator.brokerclass = FakeAccountBroker + replicator._repl_to_node = lambda *args: True + replicator.delete_db = self.stub_delete_db + + # cleanup succeeds + with mock.patch.object(replicator, 'cleanup_post_replicate', + return_value=True) as mock_cleanup: + replicator._replicate_object('0', '/path/to/file', 'node_id') + mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, [True] * 3) + self.assertIsInstance(mock_cleanup.call_args[0][0], + replicator.brokerclass) + self.assertFalse(self.delete_db_calls) + self.assertEqual(0, replicator.stats['failure']) + self.assertEqual(3, replicator.stats['success']) + + # cleanup fails + replicator._zero_stats() + with mock.patch.object(replicator, 'cleanup_post_replicate', + return_value=False) as mock_cleanup: + replicator._replicate_object('0', '/path/to/file', 'node_id') + mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, [True] * 3) + self.assertIsInstance(mock_cleanup.call_args[0][0], + replicator.brokerclass) + self.assertFalse(self.delete_db_calls) + self.assertEqual(3, replicator.stats['failure']) + self.assertEqual(0, replicator.stats['success']) + + # shouldbehere True - cleanup not required + replicator._zero_stats() + primary_node_id = replicator.ring.get_part_nodes('0')[0]['id'] + with mock.patch.object(replicator, 'cleanup_post_replicate', + return_value=True) as mock_cleanup: + replicator._replicate_object('0', '/path/to/file', primary_node_id) + mock_cleanup.assert_not_called() + self.assertFalse(self.delete_db_calls) + self.assertEqual(0, replicator.stats['failure']) + self.assertEqual(2, replicator.stats['success']) + + def test_cleanup_post_replicate(self): + replicator = TestReplicator({}, logger=self.logger) + replicator.ring = FakeRingWithNodes().Ring('path') + broker = FakeBroker() + replicator._repl_to_node = lambda *args: True + info = broker.get_replication_info() + + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [False] * 3) + mock_delete_db.assert_not_called() + self.assertTrue(res) + self.assertEqual(['Not deleting db %s (0/3 success)' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() + + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True, False, True]) + mock_delete_db.assert_not_called() + self.assertTrue(res) + self.assertEqual(['Not deleting db %s (2/3 success)' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() + + broker.stub_replication_info = {'max_row': 101} + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True] * 3) + mock_delete_db.assert_not_called() + self.assertTrue(res) + self.assertEqual(['Not deleting db %s (2 new rows)' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() + + broker.stub_replication_info = {'max_row': 98} + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True] * 3) + mock_delete_db.assert_not_called() + self.assertTrue(res) + broker.stub_replication_info = None + self.assertEqual(['Not deleting db %s (negative max_row_delta: -1)' % + broker.db_file], + replicator.logger.get_lines_for_level('error')) + replicator.logger.clear() + + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True] * 3) + mock_delete_db.assert_called_once_with(broker) + self.assertTrue(res) + self.assertEqual(['Successfully deleted db %s' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() + + with mock.patch.object(replicator, 'delete_db', + return_value=False) as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True] * 3) + mock_delete_db.assert_called_once_with(broker) + self.assertFalse(res) + self.assertEqual(['Failed to delete db %s' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() def test_replicate_object_with_exception(self): replicator = TestReplicator({}) @@ -949,6 +1188,8 @@ class TestDBReplicator(unittest.TestCase): response = rpc.dispatch(('drive', 'part', 'hash'), ['rsync_then_merge', 'arg1', 'arg2']) expected_calls = [call('/part/ash/hash/hash.db'), + call('/drive/tmp/arg1'), + call(FakeBroker.db_file), call('/drive/tmp/arg1')] self.assertEqual(mock_os.path.exists.call_args_list, expected_calls) @@ -966,7 +1207,7 @@ class TestDBReplicator(unittest.TestCase): unit.mock_check_drive(isdir=True): mock_os.path.exists.side_effect = [False, True] response = rpc.dispatch(('drive', 'part', 'hash'), - ['complete_rsync', 'arg1', 'arg2']) + ['complete_rsync', 'arg1']) expected_calls = [call('/part/ash/hash/hash.db'), call('/drive/tmp/arg1')] self.assertEqual(mock_os.path.exists.call_args_list, @@ -974,6 +1215,19 @@ class TestDBReplicator(unittest.TestCase): self.assertEqual('204 No Content', response.status) self.assertEqual(204, response.status_int) + with patch('swift.common.db_replicator.os', + new=mock.MagicMock(wraps=os)) as mock_os, \ + unit.mock_check_drive(isdir=True): + mock_os.path.exists.side_effect = [False, True] + response = rpc.dispatch(('drive', 'part', 'hash'), + ['complete_rsync', 'arg1', 'arg2']) + expected_calls = [call('/part/ash/hash/arg2'), + call('/drive/tmp/arg1')] + self.assertEqual(mock_os.path.exists.call_args_list, + expected_calls) + self.assertEqual('204 No Content', response.status) + self.assertEqual(204, response.status_int) + def test_rsync_then_merge_db_does_not_exist(self): rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker, mount_check=False) @@ -1010,7 +1264,8 @@ class TestDBReplicator(unittest.TestCase): def mock_renamer(old, new): self.assertEqual('/drive/tmp/arg1', old) - self.assertEqual('/data/db.db', new) + # FakeBroker uses module filename as db_file! + self.assertEqual(__file__, new) self._patch(patch.object, db_replicator, 'renamer', mock_renamer) @@ -1023,17 +1278,26 @@ class TestDBReplicator(unittest.TestCase): self.assertEqual('204 No Content', response.status) self.assertEqual(204, response.status_int) - def test_complete_rsync_db_does_not_exist(self): + def test_complete_rsync_db_exists(self): rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker, mount_check=False) + with patch('swift.common.db_replicator.os', + new=mock.MagicMock(wraps=os)) as mock_os, \ + unit.mock_check_drive(isdir=True): + mock_os.path.exists.return_value = True + response = rpc.complete_rsync('drive', '/data/db.db', ['arg1']) + mock_os.path.exists.assert_called_with('/data/db.db') + self.assertEqual('404 Not Found', response.status) + self.assertEqual(404, response.status_int) + with patch('swift.common.db_replicator.os', new=mock.MagicMock(wraps=os)) as mock_os, \ unit.mock_check_drive(isdir=True): mock_os.path.exists.return_value = True response = rpc.complete_rsync('drive', '/data/db.db', ['arg1', 'arg2']) - mock_os.path.exists.assert_called_with('/data/db.db') + mock_os.path.exists.assert_called_with('/data/arg2') self.assertEqual('404 Not Found', response.status) self.assertEqual(404, response.status_int) @@ -1046,37 +1310,57 @@ class TestDBReplicator(unittest.TestCase): unit.mock_check_drive(isdir=True): mock_os.path.exists.return_value = False response = rpc.complete_rsync('drive', '/data/db.db', - ['arg1', 'arg2']) + ['arg1']) expected_calls = [call('/data/db.db'), call('/drive/tmp/arg1')] self.assertEqual(expected_calls, mock_os.path.exists.call_args_list) self.assertEqual('404 Not Found', response.status) self.assertEqual(404, response.status_int) + with patch('swift.common.db_replicator.os', + new=mock.MagicMock(wraps=os)) as mock_os, \ + unit.mock_check_drive(isdir=True): + mock_os.path.exists.return_value = False + response = rpc.complete_rsync('drive', '/data/db.db', + ['arg1', 'arg2']) + expected_calls = [call('/data/arg2'), call('/drive/tmp/arg1')] + self.assertEqual(expected_calls, + mock_os.path.exists.call_args_list) + self.assertEqual('404 Not Found', response.status) + self.assertEqual(404, response.status_int) + def test_complete_rsync_rename(self): rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker, mount_check=False) - def mock_exists(path): - if path == '/data/db.db': - return False - self.assertEqual('/drive/tmp/arg1', path) - return True - def mock_renamer(old, new): - self.assertEqual('/drive/tmp/arg1', old) - self.assertEqual('/data/db.db', new) + renamer_calls.append((old, new)) self._patch(patch.object, db_replicator, 'renamer', mock_renamer) + renamer_calls = [] + with patch('swift.common.db_replicator.os', + new=mock.MagicMock(wraps=os)) as mock_os, \ + unit.mock_check_drive(isdir=True): + mock_os.path.exists.side_effect = [False, True] + response = rpc.complete_rsync('drive', '/data/db.db', + ['arg1']) + self.assertEqual('204 No Content', response.status) + self.assertEqual(204, response.status_int) + self.assertEqual(('/drive/tmp/arg1', '/data/db.db'), renamer_calls[0]) + self.assertFalse(renamer_calls[1:]) + + renamer_calls = [] with patch('swift.common.db_replicator.os', new=mock.MagicMock(wraps=os)) as mock_os, \ unit.mock_check_drive(isdir=True): mock_os.path.exists.side_effect = [False, True] response = rpc.complete_rsync('drive', '/data/db.db', ['arg1', 'arg2']) - self.assertEqual('204 No Content', response.status) - self.assertEqual(204, response.status_int) + self.assertEqual('204 No Content', response.status) + self.assertEqual(204, response.status_int) + self.assertEqual(('/drive/tmp/arg1', '/data/arg2'), renamer_calls[0]) + self.assertFalse(renamer_calls[1:]) def test_replicator_sync_with_broker_replication_missing_table(self): rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker, @@ -1435,10 +1719,10 @@ class TestDBReplicator(unittest.TestCase): db_file = __file__ replicator = TestReplicator({}) replicator._http_connect(node, partition, db_file) + expected_hsh = os.path.basename(db_file).split('.', 1)[0] + expected_hsh = expected_hsh.split('_', 1)[0] db_replicator.ReplConnection.assert_has_calls([ - mock.call(node, partition, - os.path.basename(db_file).split('.', 1)[0], - replicator.logger)]) + mock.call(node, partition, expected_hsh, replicator.logger)]) class TestHandoffsOnly(unittest.TestCase): @@ -1740,7 +2024,7 @@ class TestReplToNode(unittest.TestCase): def test_repl_to_node_300_status(self): self.http = ReplHttp('{"id": 3, "point": -1}', set_status=300) - self.assertIsNone(self.replicator._repl_to_node( + self.assertFalse(self.replicator._repl_to_node( self.fake_node, FakeBroker(), '0', self.fake_info)) def test_repl_to_node_not_response(self): @@ -1769,45 +2053,6 @@ class TestReplToNode(unittest.TestCase): ]) -class FakeHTTPResponse(object): - - def __init__(self, resp): - self.resp = resp - - @property - def status(self): - return self.resp.status_int - - @property - def data(self): - return self.resp.body - - -def attach_fake_replication_rpc(rpc, replicate_hook=None): - class FakeReplConnection(object): - - def __init__(self, node, partition, hash_, logger): - self.logger = logger - self.node = node - self.partition = partition - self.path = '/%s/%s/%s' % (node['device'], partition, hash_) - self.host = node['replication_ip'] - - def replicate(self, op, *sync_args): - print('REPLICATE: %s, %s, %r' % (self.path, op, sync_args)) - replicate_args = self.path.lstrip('/').split('/') - args = [op] + list(sync_args) - with unit.mock_check_drive(isdir=not rpc.mount_check, - ismount=rpc.mount_check): - swob_response = rpc.dispatch(replicate_args, args) - resp = FakeHTTPResponse(swob_response) - if replicate_hook: - replicate_hook(op, *sync_args) - return resp - - return FakeReplConnection - - class ExampleReplicator(db_replicator.Replicator): server_type = 'fake' brokerclass = ExampleBroker @@ -1872,15 +2117,19 @@ class TestReplicatorSync(unittest.TestCase): conf.update(conf_updates) return self.replicator_daemon(conf, logger=self.logger) - def _run_once(self, node, conf_updates=None, daemon=None): - daemon = daemon or self._get_daemon(node, conf_updates) - + def _install_fake_rsync_file(self, daemon, captured_calls=None): def _rsync_file(db_file, remote_file, **kwargs): + if captured_calls is not None: + captured_calls.append((db_file, remote_file, kwargs)) remote_server, remote_path = remote_file.split('/', 1) dest_path = os.path.join(self.root, remote_path) copy(db_file, dest_path) return True daemon._rsync_file = _rsync_file + + def _run_once(self, node, conf_updates=None, daemon=None): + daemon = daemon or self._get_daemon(node, conf_updates) + self._install_fake_rsync_file(daemon) with mock.patch('swift.common.db_replicator.whataremyips', new=lambda *a, **kw: [node['replication_ip']]), \ unit.mock_check_drive(isdir=not daemon.mount_check, diff --git a/test/unit/common/test_direct_client.py b/test/unit/common/test_direct_client.py index a832f31c6f..fc2dffc696 100644 --- a/test/unit/common/test_direct_client.py +++ b/test/unit/common/test_direct_client.py @@ -95,6 +95,11 @@ def mocked_http_conn(*args, **kwargs): yield fake_conn +@contextmanager +def noop_timeout(duration): + yield + + @patch_policies class TestDirectClient(unittest.TestCase): @@ -117,6 +122,10 @@ class TestDirectClient(unittest.TestCase): self.account, self.container, self.obj)) self.user_agent = 'direct-client %s' % os.getpid() + patcher = mock.patch.object(direct_client, 'Timeout', noop_timeout) + patcher.start() + self.addCleanup(patcher.stop) + def test_gen_headers(self): stub_user_agent = 'direct-client %s' % os.getpid() @@ -450,6 +459,67 @@ class TestDirectClient(unittest.TestCase): self.assertEqual(err.http_status, 500) self.assertTrue('DELETE' in str(err)) + def test_direct_put_container(self): + body = 'Let us begin with a quick introduction' + headers = {'x-foo': 'bar', 'Content-Length': str(len(body)), + 'Content-Type': 'application/json', + 'User-Agent': 'my UA'} + + with mocked_http_conn(204) as conn: + rv = direct_client.direct_put_container( + self.node, self.part, self.account, self.container, + contents=body, headers=headers) + self.assertEqual(conn.host, self.node['ip']) + self.assertEqual(conn.port, self.node['port']) + self.assertEqual(conn.method, 'PUT') + self.assertEqual(conn.path, self.container_path) + self.assertEqual(conn.req_headers['Content-Length'], + str(len(body))) + self.assertEqual(conn.req_headers['Content-Type'], + 'application/json') + self.assertEqual(conn.req_headers['User-Agent'], 'my UA') + self.assertTrue('x-timestamp' in conn.req_headers) + self.assertEqual('bar', conn.req_headers.get('x-foo')) + self.assertEqual(md5(body).hexdigest(), conn.etag.hexdigest()) + self.assertIsNone(rv) + + def test_direct_put_container_chunked(self): + body = 'Let us begin with a quick introduction' + headers = {'x-foo': 'bar', 'Content-Type': 'application/json'} + + with mocked_http_conn(204) as conn: + rv = direct_client.direct_put_container( + self.node, self.part, self.account, self.container, + contents=body, headers=headers) + self.assertEqual(conn.host, self.node['ip']) + self.assertEqual(conn.port, self.node['port']) + self.assertEqual(conn.method, 'PUT') + self.assertEqual(conn.path, self.container_path) + self.assertEqual(conn.req_headers['Transfer-Encoding'], 'chunked') + self.assertEqual(conn.req_headers['Content-Type'], + 'application/json') + self.assertTrue('x-timestamp' in conn.req_headers) + self.assertEqual('bar', conn.req_headers.get('x-foo')) + self.assertNotIn('Content-Length', conn.req_headers) + expected_sent = '%0x\r\n%s\r\n0\r\n\r\n' % (len(body), body) + self.assertEqual(md5(expected_sent).hexdigest(), + conn.etag.hexdigest()) + self.assertIsNone(rv) + + def test_direct_put_container_fail(self): + with mock.patch('swift.common.bufferedhttp.http_connect_raw', + side_effect=Exception('conn failed')): + with self.assertRaises(Exception) as cm: + direct_client.direct_put_container( + self.node, self.part, self.account, self.container) + self.assertEqual('conn failed', str(cm.exception)) + + with mocked_http_conn(Exception('resp failed')): + with self.assertRaises(Exception) as cm: + direct_client.direct_put_container( + self.node, self.part, self.account, self.container) + self.assertEqual('resp failed', str(cm.exception)) + def test_direct_put_container_object(self): headers = {'x-foo': 'bar'} diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index b9caaabf34..7abad33ec2 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -15,7 +15,11 @@ """Tests for swift.common.utils""" from __future__ import print_function -from test.unit import temptree, debug_logger, make_timestamp_iter, with_tempdir + +import hashlib + +from test.unit import temptree, debug_logger, make_timestamp_iter, \ + with_tempdir, mock_timestamp_now import ctypes import contextlib @@ -1454,6 +1458,15 @@ class TestUtils(unittest.TestCase): with open(testcache_file) as fd: file_dict = json.loads(fd.readline()) self.assertEqual(expect_dict, file_dict) + # nested dict items are not sticky + submit_dict = {'key1': {'key2': {'value3': 3}}} + expect_dict = {'key0': 101, + 'key1': {'key2': {'value3': 3}, + 'value1': 1, 'value2': 2}} + utils.dump_recon_cache(submit_dict, testcache_file, logger) + with open(testcache_file) as fd: + file_dict = json.loads(fd.readline()) + self.assertEqual(expect_dict, file_dict) # cached entries are sticky submit_dict = {} utils.dump_recon_cache(submit_dict, testcache_file, logger) @@ -2753,6 +2766,53 @@ cluster_dfw1 = http://dfw1.host/v1/ else: self.assertEqual(expected, rv) + def test_config_float_value(self): + for args, expected in ( + ((99, None, None), 99.0), + ((99.01, None, None), 99.01), + (('99', None, None), 99.0), + (('99.01', None, None), 99.01), + ((99, 99, None), 99.0), + ((99.01, 99.01, None), 99.01), + (('99', 99, None), 99.0), + (('99.01', 99.01, None), 99.01), + ((99, None, 99), 99.0), + ((99.01, None, 99.01), 99.01), + (('99', None, 99), 99.0), + (('99.01', None, 99.01), 99.01), + ((-99, -99, -99), -99.0), + ((-99.01, -99.01, -99.01), -99.01), + (('-99', -99, -99), -99.0), + (('-99.01', -99.01, -99.01), -99.01),): + actual = utils.config_float_value(*args) + self.assertEqual(expected, actual) + + for val, minimum in ((99, 100), + ('99', 100), + (-99, -98), + ('-98.01', -98)): + with self.assertRaises(ValueError) as cm: + utils.config_float_value(val, minimum=minimum) + self.assertIn('greater than %s' % minimum, cm.exception.args[0]) + self.assertNotIn('less than', cm.exception.args[0]) + + for val, maximum in ((99, 98), + ('99', 98), + (-99, -100), + ('-97.9', -98)): + with self.assertRaises(ValueError) as cm: + utils.config_float_value(val, maximum=maximum) + self.assertIn('less than %s' % maximum, cm.exception.args[0]) + self.assertNotIn('greater than', cm.exception.args[0]) + + for val, minimum, maximum in ((99, 99, 98), + ('99', 100, 100), + (99, 98, 98),): + with self.assertRaises(ValueError) as cm: + utils.config_float_value(val, minimum=minimum, maximum=maximum) + self.assertIn('greater than %s' % minimum, cm.exception.args[0]) + self.assertIn('less than %s' % maximum, cm.exception.args[0]) + def test_config_auto_int_value(self): expectations = { # (value, default) : expected, @@ -3807,6 +3867,105 @@ cluster_dfw1 = http://dfw1.host/v1/ if tempdir: shutil.rmtree(tempdir) + def test_find_shard_range(self): + ts = utils.Timestamp.now().internal + start = utils.ShardRange('a/-a', ts, '', 'a') + atof = utils.ShardRange('a/a-f', ts, 'a', 'f') + ftol = utils.ShardRange('a/f-l', ts, 'f', 'l') + ltor = utils.ShardRange('a/l-r', ts, 'l', 'r') + rtoz = utils.ShardRange('a/r-z', ts, 'r', 'z') + end = utils.ShardRange('a/z-', ts, 'z', '') + ranges = [start, atof, ftol, ltor, rtoz, end] + + found = utils.find_shard_range('', ranges) + self.assertEqual(found, None) + found = utils.find_shard_range(' ', ranges) + self.assertEqual(found, start) + found = utils.find_shard_range(' ', ranges[1:]) + self.assertEqual(found, None) + found = utils.find_shard_range('b', ranges) + self.assertEqual(found, atof) + found = utils.find_shard_range('f', ranges) + self.assertEqual(found, atof) + found = utils.find_shard_range('f\x00', ranges) + self.assertEqual(found, ftol) + found = utils.find_shard_range('x', ranges) + self.assertEqual(found, rtoz) + found = utils.find_shard_range('r', ranges) + self.assertEqual(found, ltor) + found = utils.find_shard_range('}', ranges) + self.assertEqual(found, end) + found = utils.find_shard_range('}', ranges[:-1]) + self.assertEqual(found, None) + # remove l-r from list of ranges and try and find a shard range for an + # item in that range. + found = utils.find_shard_range('p', ranges[:-3] + ranges[-2:]) + self.assertEqual(found, None) + + # add some sub-shards; a sub-shard's state is less than its parent + # while the parent is undeleted, so insert these ahead of the + # overlapping parent in the list of ranges + ftoh = utils.ShardRange('a/f-h', ts, 'f', 'h') + htok = utils.ShardRange('a/h-k', ts, 'h', 'k') + + overlapping_ranges = ranges[:2] + [ftoh, htok] + ranges[2:] + found = utils.find_shard_range('g', overlapping_ranges) + self.assertEqual(found, ftoh) + found = utils.find_shard_range('h', overlapping_ranges) + self.assertEqual(found, ftoh) + found = utils.find_shard_range('k', overlapping_ranges) + self.assertEqual(found, htok) + found = utils.find_shard_range('l', overlapping_ranges) + self.assertEqual(found, ftol) + found = utils.find_shard_range('m', overlapping_ranges) + self.assertEqual(found, ltor) + + ktol = utils.ShardRange('a/k-l', ts, 'k', 'l') + overlapping_ranges = ranges[:2] + [ftoh, htok, ktol] + ranges[2:] + found = utils.find_shard_range('l', overlapping_ranges) + self.assertEqual(found, ktol) + + def test_parse_db_filename(self): + actual = utils.parse_db_filename('hash.db') + self.assertEqual(('hash', None, '.db'), actual) + actual = utils.parse_db_filename('hash_1234567890.12345.db') + self.assertEqual(('hash', '1234567890.12345', '.db'), actual) + actual = utils.parse_db_filename( + '/dev/containers/part/ash/hash/hash_1234567890.12345.db') + self.assertEqual(('hash', '1234567890.12345', '.db'), actual) + self.assertRaises(ValueError, utils.parse_db_filename, '/path/to/dir/') + # These shouldn't come up in practice; included for completeness + self.assertEqual(utils.parse_db_filename('hashunder_.db'), + ('hashunder', '', '.db')) + self.assertEqual(utils.parse_db_filename('lots_of_underscores.db'), + ('lots', 'of', '.db')) + + def test_make_db_file_path(self): + epoch = utils.Timestamp.now() + actual = utils.make_db_file_path('hash.db', epoch) + self.assertEqual('hash_%s.db' % epoch.internal, actual) + + actual = utils.make_db_file_path('hash_oldepoch.db', epoch) + self.assertEqual('hash_%s.db' % epoch.internal, actual) + + actual = utils.make_db_file_path('/path/to/hash.db', epoch) + self.assertEqual('/path/to/hash_%s.db' % epoch.internal, actual) + + epoch = utils.Timestamp.now() + actual = utils.make_db_file_path(actual, epoch) + self.assertEqual('/path/to/hash_%s.db' % epoch.internal, actual) + + # epochs shouldn't have offsets + epoch = utils.Timestamp.now(offset=10) + actual = utils.make_db_file_path(actual, epoch) + self.assertEqual('/path/to/hash_%s.db' % epoch.normal, actual) + + self.assertRaises(ValueError, utils.make_db_file_path, + '/path/to/hash.db', 'bad epoch') + + self.assertRaises(ValueError, utils.make_db_file_path, + '/path/to/hash.db', None) + def test_modify_priority(self): pid = os.getpid() logger = debug_logger() @@ -4097,6 +4256,115 @@ cluster_dfw1 = http://dfw1.host/v1/ # iterators self.assertListEqual([1, 4, 6, 2, 5, 7, 3, 8, 9], got) + @with_tempdir + def test_get_db_files(self, tempdir): + dbdir = os.path.join(tempdir, 'dbdir') + self.assertEqual([], utils.get_db_files(dbdir)) + path_1 = os.path.join(dbdir, 'dbfile.db') + self.assertEqual([], utils.get_db_files(path_1)) + os.mkdir(dbdir) + self.assertEqual([], utils.get_db_files(path_1)) + with open(path_1, 'wb'): + pass + self.assertEqual([path_1], utils.get_db_files(path_1)) + + path_2 = os.path.join(dbdir, 'dbfile_2.db') + self.assertEqual([path_1], utils.get_db_files(path_2)) + + with open(path_2, 'wb'): + pass + + self.assertEqual([path_1, path_2], utils.get_db_files(path_1)) + self.assertEqual([path_1, path_2], utils.get_db_files(path_2)) + + path_3 = os.path.join(dbdir, 'dbfile_3.db') + self.assertEqual([path_1, path_2], utils.get_db_files(path_3)) + + with open(path_3, 'wb'): + pass + + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_1)) + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_2)) + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_3)) + + other_hash = os.path.join(dbdir, 'other.db') + self.assertEqual([], utils.get_db_files(other_hash)) + other_hash = os.path.join(dbdir, 'other_1.db') + self.assertEqual([], utils.get_db_files(other_hash)) + + pending = os.path.join(dbdir, 'dbfile.pending') + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(pending)) + + with open(pending, 'wb'): + pass + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(pending)) + + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_1)) + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_2)) + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_3)) + self.assertEqual([], utils.get_db_files(dbdir)) + + os.unlink(path_1) + self.assertEqual([path_2, path_3], utils.get_db_files(path_1)) + self.assertEqual([path_2, path_3], utils.get_db_files(path_2)) + self.assertEqual([path_2, path_3], utils.get_db_files(path_3)) + + os.unlink(path_2) + self.assertEqual([path_3], utils.get_db_files(path_1)) + self.assertEqual([path_3], utils.get_db_files(path_2)) + self.assertEqual([path_3], utils.get_db_files(path_3)) + + os.unlink(path_3) + self.assertEqual([], utils.get_db_files(path_1)) + self.assertEqual([], utils.get_db_files(path_2)) + self.assertEqual([], utils.get_db_files(path_3)) + self.assertEqual([], utils.get_db_files('/path/to/nowhere')) + + def test_get_redirect_data(self): + ts_now = utils.Timestamp.now() + headers = {'X-Backend-Redirect-Timestamp': ts_now.internal} + response = FakeResponse(200, headers, '') + self.assertIsNone(utils.get_redirect_data(response)) + + headers = {'Location': '/a/c/o', + 'X-Backend-Redirect-Timestamp': ts_now.internal} + response = FakeResponse(200, headers, '') + path, ts = utils.get_redirect_data(response) + self.assertEqual('a/c', path) + self.assertEqual(ts_now, ts) + + headers = {'Location': '/a/c', + 'X-Backend-Redirect-Timestamp': ts_now.internal} + response = FakeResponse(200, headers, '') + path, ts = utils.get_redirect_data(response) + self.assertEqual('a/c', path) + self.assertEqual(ts_now, ts) + + def do_test(headers): + response = FakeResponse(200, headers, '') + with self.assertRaises(ValueError) as cm: + utils.get_redirect_data(response) + return cm.exception + + exc = do_test({'Location': '/a', + 'X-Backend-Redirect-Timestamp': ts_now.internal}) + self.assertIn('Invalid path', str(exc)) + + exc = do_test({'Location': '', + 'X-Backend-Redirect-Timestamp': ts_now.internal}) + self.assertIn('Invalid path', str(exc)) + + exc = do_test({'Location': '/a/c', + 'X-Backend-Redirect-Timestamp': 'bad'}) + self.assertIn('Invalid timestamp', str(exc)) + + exc = do_test({'Location': '/a/c'}) + self.assertIn('Invalid timestamp', str(exc)) + + exc = do_test({'Location': '/a/c', + 'X-Backend-Redirect-Timestamp': '-1'}) + self.assertIn('Invalid timestamp', str(exc)) + class ResellerConfReader(unittest.TestCase): @@ -6656,5 +6924,828 @@ class TestDistributeEvenly(unittest.TestCase): self.assertEqual(out, [[0], [1], [2], [3], [4], [], []]) +class TestShardRange(unittest.TestCase): + def setUp(self): + self.ts_iter = make_timestamp_iter() + + def test_min_max_bounds(self): + # max + self.assertEqual(utils.ShardRange.MAX, utils.ShardRange.MAX) + self.assertFalse(utils.ShardRange.MAX > utils.ShardRange.MAX) + self.assertFalse(utils.ShardRange.MAX < utils.ShardRange.MAX) + + for val in 'z', u'\u00e4': + self.assertFalse(utils.ShardRange.MAX == val) + self.assertFalse(val > utils.ShardRange.MAX) + self.assertTrue(val < utils.ShardRange.MAX) + self.assertTrue(utils.ShardRange.MAX > val) + self.assertFalse(utils.ShardRange.MAX < val) + + self.assertEqual('', str(utils.ShardRange.MAX)) + self.assertFalse(utils.ShardRange.MAX) + self.assertTrue(utils.ShardRange.MAX == utils.ShardRange.MAX) + self.assertFalse(utils.ShardRange.MAX != utils.ShardRange.MAX) + self.assertTrue( + utils.ShardRange.MaxBound() == utils.ShardRange.MaxBound()) + self.assertFalse( + utils.ShardRange.MaxBound() != utils.ShardRange.MaxBound()) + + # min + self.assertEqual(utils.ShardRange.MIN, utils.ShardRange.MIN) + self.assertFalse(utils.ShardRange.MIN > utils.ShardRange.MIN) + self.assertFalse(utils.ShardRange.MIN < utils.ShardRange.MIN) + + for val in 'z', u'\u00e4': + self.assertFalse(utils.ShardRange.MIN == val) + self.assertFalse(val < utils.ShardRange.MIN) + self.assertTrue(val > utils.ShardRange.MIN) + self.assertTrue(utils.ShardRange.MIN < val) + self.assertFalse(utils.ShardRange.MIN > val) + self.assertFalse(utils.ShardRange.MIN) + + self.assertEqual('', str(utils.ShardRange.MIN)) + self.assertFalse(utils.ShardRange.MIN) + self.assertTrue(utils.ShardRange.MIN == utils.ShardRange.MIN) + self.assertFalse(utils.ShardRange.MIN != utils.ShardRange.MIN) + self.assertTrue( + utils.ShardRange.MinBound() == utils.ShardRange.MinBound()) + self.assertFalse( + utils.ShardRange.MinBound() != utils.ShardRange.MinBound()) + + self.assertFalse(utils.ShardRange.MAX == utils.ShardRange.MIN) + self.assertFalse(utils.ShardRange.MIN == utils.ShardRange.MAX) + self.assertTrue(utils.ShardRange.MAX != utils.ShardRange.MIN) + self.assertTrue(utils.ShardRange.MIN != utils.ShardRange.MAX) + + self.assertEqual(utils.ShardRange.MAX, + max(utils.ShardRange.MIN, utils.ShardRange.MAX)) + self.assertEqual(utils.ShardRange.MIN, + min(utils.ShardRange.MIN, utils.ShardRange.MAX)) + + def test_shard_range_initialisation(self): + def assert_initialisation_ok(params, expected): + pr = utils.ShardRange(**params) + self.assertDictEqual(dict(pr), expected) + + def assert_initialisation_fails(params, err_type=ValueError): + with self.assertRaises(err_type): + utils.ShardRange(**params) + + ts_1 = next(self.ts_iter) + ts_2 = next(self.ts_iter) + ts_3 = next(self.ts_iter) + ts_4 = next(self.ts_iter) + empty_run = dict(name=None, timestamp=None, lower=None, + upper=None, object_count=0, bytes_used=0, + meta_timestamp=None, deleted=0, + state=utils.ShardRange.FOUND, state_timestamp=None, + epoch=None) + # name, timestamp must be given + assert_initialisation_fails(empty_run.copy()) + assert_initialisation_fails(dict(empty_run, name='a/c'), TypeError) + assert_initialisation_fails(dict(empty_run, timestamp=ts_1)) + # name must be form a/c + assert_initialisation_fails(dict(empty_run, name='c', timestamp=ts_1)) + assert_initialisation_fails(dict(empty_run, name='', timestamp=ts_1)) + assert_initialisation_fails(dict(empty_run, name='/a/c', + timestamp=ts_1)) + assert_initialisation_fails(dict(empty_run, name='/c', + timestamp=ts_1)) + # lower, upper can be None + expect = dict(name='a/c', timestamp=ts_1.internal, lower='', + upper='', object_count=0, bytes_used=0, + meta_timestamp=ts_1.internal, deleted=0, + state=utils.ShardRange.FOUND, + state_timestamp=ts_1.internal, epoch=None) + assert_initialisation_ok(dict(empty_run, name='a/c', timestamp=ts_1), + expect) + assert_initialisation_ok(dict(name='a/c', timestamp=ts_1), expect) + + good_run = dict(name='a/c', timestamp=ts_1, lower='l', + upper='u', object_count=2, bytes_used=10, + meta_timestamp=ts_2, deleted=0, + state=utils.ShardRange.CREATED, + state_timestamp=ts_3.internal, epoch=ts_4) + expect.update({'lower': 'l', 'upper': 'u', 'object_count': 2, + 'bytes_used': 10, 'meta_timestamp': ts_2.internal, + 'state': utils.ShardRange.CREATED, + 'state_timestamp': ts_3.internal, 'epoch': ts_4}) + assert_initialisation_ok(good_run.copy(), expect) + + # obj count and bytes used as int strings + good_str_run = good_run.copy() + good_str_run.update({'object_count': '2', 'bytes_used': '10'}) + assert_initialisation_ok(good_str_run, expect) + + good_no_meta = good_run.copy() + good_no_meta.pop('meta_timestamp') + assert_initialisation_ok(good_no_meta, + dict(expect, meta_timestamp=ts_1.internal)) + + good_deleted = good_run.copy() + good_deleted['deleted'] = 1 + assert_initialisation_ok(good_deleted, + dict(expect, deleted=1)) + + assert_initialisation_fails(dict(good_run, timestamp='water balloon')) + + assert_initialisation_fails( + dict(good_run, meta_timestamp='water balloon')) + + assert_initialisation_fails(dict(good_run, lower='water balloon')) + + assert_initialisation_fails(dict(good_run, upper='balloon')) + + assert_initialisation_fails( + dict(good_run, object_count='water balloon')) + + assert_initialisation_fails(dict(good_run, bytes_used='water ballon')) + + assert_initialisation_fails(dict(good_run, object_count=-1)) + + assert_initialisation_fails(dict(good_run, bytes_used=-1)) + assert_initialisation_fails(dict(good_run, state=-1)) + assert_initialisation_fails(dict(good_run, state_timestamp='not a ts')) + assert_initialisation_fails(dict(good_run, name='/a/c')) + assert_initialisation_fails(dict(good_run, name='/a/c/')) + assert_initialisation_fails(dict(good_run, name='a/c/')) + assert_initialisation_fails(dict(good_run, name='a')) + assert_initialisation_fails(dict(good_run, name='')) + + def _check_to_from_dict(self, lower, upper): + ts_1 = next(self.ts_iter) + ts_2 = next(self.ts_iter) + ts_3 = next(self.ts_iter) + ts_4 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, lower, upper, 10, 100, ts_2, + state=None, state_timestamp=ts_3, epoch=ts_4) + sr_dict = dict(sr) + expected = { + 'name': 'a/test', 'timestamp': ts_1.internal, 'lower': lower, + 'upper': upper, 'object_count': 10, 'bytes_used': 100, + 'meta_timestamp': ts_2.internal, 'deleted': 0, + 'state': utils.ShardRange.FOUND, 'state_timestamp': ts_3.internal, + 'epoch': ts_4} + self.assertEqual(expected, sr_dict) + self.assertIsInstance(sr_dict['lower'], six.string_types) + self.assertIsInstance(sr_dict['upper'], six.string_types) + sr_new = utils.ShardRange.from_dict(sr_dict) + self.assertEqual(sr, sr_new) + self.assertEqual(sr_dict, dict(sr_new)) + + sr_new = utils.ShardRange(**sr_dict) + self.assertEqual(sr, sr_new) + self.assertEqual(sr_dict, dict(sr_new)) + + for key in sr_dict: + bad_dict = dict(sr_dict) + bad_dict.pop(key) + with self.assertRaises(KeyError): + utils.ShardRange.from_dict(bad_dict) + # But __init__ still (generally) works! + if key not in ('name', 'timestamp'): + utils.ShardRange(**bad_dict) + else: + with self.assertRaises(TypeError): + utils.ShardRange(**bad_dict) + + def test_to_from_dict(self): + self._check_to_from_dict('l', 'u') + self._check_to_from_dict('', '') + + def test_timestamp_setter(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None) + self.assertEqual(ts_1, sr.timestamp) + + ts_2 = next(self.ts_iter) + sr.timestamp = ts_2 + self.assertEqual(ts_2, sr.timestamp) + + sr.timestamp = 0 + self.assertEqual(utils.Timestamp(0), sr.timestamp) + + with self.assertRaises(TypeError): + sr.timestamp = None + + def test_meta_timestamp_setter(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None) + self.assertEqual(ts_1, sr.timestamp) + self.assertEqual(ts_1, sr.meta_timestamp) + + ts_2 = next(self.ts_iter) + sr.meta_timestamp = ts_2 + self.assertEqual(ts_1, sr.timestamp) + self.assertEqual(ts_2, sr.meta_timestamp) + + ts_3 = next(self.ts_iter) + sr.timestamp = ts_3 + self.assertEqual(ts_3, sr.timestamp) + self.assertEqual(ts_2, sr.meta_timestamp) + + # meta_timestamp defaults to tracking timestamp + sr.meta_timestamp = None + self.assertEqual(ts_3, sr.timestamp) + self.assertEqual(ts_3, sr.meta_timestamp) + ts_4 = next(self.ts_iter) + sr.timestamp = ts_4 + self.assertEqual(ts_4, sr.timestamp) + self.assertEqual(ts_4, sr.meta_timestamp) + + sr.meta_timestamp = 0 + self.assertEqual(ts_4, sr.timestamp) + self.assertEqual(utils.Timestamp(0), sr.meta_timestamp) + + def test_update_meta(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None) + with mock_timestamp_now(next(self.ts_iter)) as now: + sr.update_meta(9, 99) + self.assertEqual(9, sr.object_count) + self.assertEqual(99, sr.bytes_used) + self.assertEqual(now, sr.meta_timestamp) + + with mock_timestamp_now(next(self.ts_iter)) as now: + sr.update_meta(99, 999, None) + self.assertEqual(99, sr.object_count) + self.assertEqual(999, sr.bytes_used) + self.assertEqual(now, sr.meta_timestamp) + + ts_2 = next(self.ts_iter) + sr.update_meta(21, 2112, ts_2) + self.assertEqual(21, sr.object_count) + self.assertEqual(2112, sr.bytes_used) + self.assertEqual(ts_2, sr.meta_timestamp) + + sr.update_meta('11', '12') + self.assertEqual(11, sr.object_count) + self.assertEqual(12, sr.bytes_used) + + def check_bad_args(*args): + with self.assertRaises(ValueError): + sr.update_meta(*args) + check_bad_args('bad', 10) + check_bad_args(10, 'bad') + check_bad_args(10, 11, 'bad') + + def test_increment_meta(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 1, 2, None) + with mock_timestamp_now(next(self.ts_iter)) as now: + sr.increment_meta(9, 99) + self.assertEqual(10, sr.object_count) + self.assertEqual(101, sr.bytes_used) + self.assertEqual(now, sr.meta_timestamp) + + sr.increment_meta('11', '12') + self.assertEqual(21, sr.object_count) + self.assertEqual(113, sr.bytes_used) + + def check_bad_args(*args): + with self.assertRaises(ValueError): + sr.increment_meta(*args) + check_bad_args('bad', 10) + check_bad_args(10, 'bad') + + def test_state_timestamp_setter(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None) + self.assertEqual(ts_1, sr.timestamp) + self.assertEqual(ts_1, sr.state_timestamp) + + ts_2 = next(self.ts_iter) + sr.state_timestamp = ts_2 + self.assertEqual(ts_1, sr.timestamp) + self.assertEqual(ts_2, sr.state_timestamp) + + ts_3 = next(self.ts_iter) + sr.timestamp = ts_3 + self.assertEqual(ts_3, sr.timestamp) + self.assertEqual(ts_2, sr.state_timestamp) + + # state_timestamp defaults to tracking timestamp + sr.state_timestamp = None + self.assertEqual(ts_3, sr.timestamp) + self.assertEqual(ts_3, sr.state_timestamp) + ts_4 = next(self.ts_iter) + sr.timestamp = ts_4 + self.assertEqual(ts_4, sr.timestamp) + self.assertEqual(ts_4, sr.state_timestamp) + + sr.state_timestamp = 0 + self.assertEqual(ts_4, sr.timestamp) + self.assertEqual(utils.Timestamp(0), sr.state_timestamp) + + def test_state_setter(self): + for state in utils.ShardRange.STATES: + for test_value in (state, str(state)): + sr = utils.ShardRange('a/test', next(self.ts_iter), 'l', 'u') + sr.state = test_value + actual = sr.state + self.assertEqual( + state, actual, + 'Expected %s but got %s for %s' % + (state, actual, test_value) + ) + + for bad_state in (max(utils.ShardRange.STATES) + 1, + -1, 99, None, 'stringy', 1.1): + sr = utils.ShardRange('a/test', next(self.ts_iter), 'l', 'u') + with self.assertRaises(ValueError) as cm: + sr.state = bad_state + self.assertIn('Invalid state', str(cm.exception)) + + def test_update_state(self): + sr = utils.ShardRange('a/c', next(self.ts_iter)) + old_sr = sr.copy() + self.assertEqual(utils.ShardRange.FOUND, sr.state) + self.assertEqual(dict(sr), dict(old_sr)) # sanity check + + for state in utils.ShardRange.STATES: + if state == utils.ShardRange.FOUND: + continue + self.assertTrue(sr.update_state(state)) + self.assertEqual(dict(old_sr, state=state), dict(sr)) + self.assertFalse(sr.update_state(state)) + self.assertEqual(dict(old_sr, state=state), dict(sr)) + + sr = utils.ShardRange('a/c', next(self.ts_iter)) + old_sr = sr.copy() + for state in utils.ShardRange.STATES: + ts = next(self.ts_iter) + self.assertTrue(sr.update_state(state, state_timestamp=ts)) + self.assertEqual(dict(old_sr, state=state, state_timestamp=ts), + dict(sr)) + + def test_resolve_state(self): + for name, number in utils.ShardRange.STATES_BY_NAME.items(): + self.assertEqual( + (number, name), utils.ShardRange.resolve_state(name)) + self.assertEqual( + (number, name), utils.ShardRange.resolve_state(name.upper())) + self.assertEqual( + (number, name), utils.ShardRange.resolve_state(name.title())) + self.assertEqual( + (number, name), utils.ShardRange.resolve_state(number)) + + def check_bad_value(value): + with self.assertRaises(ValueError) as cm: + utils.ShardRange.resolve_state(value) + self.assertIn('Invalid state %r' % value, str(cm.exception)) + + check_bad_value(min(utils.ShardRange.STATES) - 1) + check_bad_value(max(utils.ShardRange.STATES) + 1) + check_bad_value('badstate') + + def test_epoch_setter(self): + sr = utils.ShardRange('a/c', next(self.ts_iter)) + self.assertIsNone(sr.epoch) + ts = next(self.ts_iter) + sr.epoch = ts + self.assertEqual(ts, sr.epoch) + ts = next(self.ts_iter) + sr.epoch = ts.internal + self.assertEqual(ts, sr.epoch) + sr.epoch = None + self.assertIsNone(sr.epoch) + with self.assertRaises(ValueError): + sr.epoch = 'bad' + + def test_deleted_setter(self): + sr = utils.ShardRange('a/c', next(self.ts_iter)) + for val in (True, 1): + sr.deleted = val + self.assertIs(True, sr.deleted) + for val in (False, 0, None): + sr.deleted = val + self.assertIs(False, sr.deleted) + + def test_set_deleted(self): + sr = utils.ShardRange('a/c', next(self.ts_iter)) + # initialise other timestamps + sr.update_state(utils.ShardRange.ACTIVE, + state_timestamp=utils.Timestamp.now()) + sr.update_meta(1, 2) + old_sr = sr.copy() + self.assertIs(False, sr.deleted) # sanity check + self.assertEqual(dict(sr), dict(old_sr)) # sanity check + + with mock_timestamp_now(next(self.ts_iter)) as now: + self.assertTrue(sr.set_deleted()) + self.assertEqual(now, sr.timestamp) + self.assertIs(True, sr.deleted) + old_sr_dict = dict(old_sr) + old_sr_dict.pop('deleted') + old_sr_dict.pop('timestamp') + sr_dict = dict(sr) + sr_dict.pop('deleted') + sr_dict.pop('timestamp') + self.assertEqual(old_sr_dict, sr_dict) + + # no change + self.assertFalse(sr.set_deleted()) + self.assertEqual(now, sr.timestamp) + self.assertIs(True, sr.deleted) + + # force timestamp change + with mock_timestamp_now(next(self.ts_iter)) as now: + self.assertTrue(sr.set_deleted(timestamp=now)) + self.assertEqual(now, sr.timestamp) + self.assertIs(True, sr.deleted) + + def test_lower_setter(self): + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', '') + # sanity checks + self.assertEqual('b', sr.lower) + self.assertEqual(sr.MAX, sr.upper) + + def do_test(good_value, expected): + sr.lower = good_value + self.assertEqual(expected, sr.lower) + self.assertEqual(sr.MAX, sr.upper) + + do_test(utils.ShardRange.MIN, utils.ShardRange.MIN) + do_test(utils.ShardRange.MAX, utils.ShardRange.MAX) + do_test('', utils.ShardRange.MIN) + do_test(u'', utils.ShardRange.MIN) + do_test(None, utils.ShardRange.MIN) + do_test('a', 'a') + do_test('y', 'y') + + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y') + sr.lower = '' + self.assertEqual(sr.MIN, sr.lower) + + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y') + with self.assertRaises(ValueError) as cm: + sr.lower = 'z' + self.assertIn("lower ('z') must be less than or equal to upper ('y')", + str(cm.exception)) + self.assertEqual('b', sr.lower) + self.assertEqual('y', sr.upper) + + def do_test(bad_value): + with self.assertRaises(TypeError) as cm: + sr.lower = bad_value + self.assertIn("lower must be a string", str(cm.exception)) + self.assertEqual('b', sr.lower) + self.assertEqual('y', sr.upper) + + do_test(1) + do_test(1.234) + + def test_upper_setter(self): + sr = utils.ShardRange('a/c', utils.Timestamp.now(), '', 'y') + # sanity checks + self.assertEqual(sr.MIN, sr.lower) + self.assertEqual('y', sr.upper) + + def do_test(good_value, expected): + sr.upper = good_value + self.assertEqual(expected, sr.upper) + self.assertEqual(sr.MIN, sr.lower) + + do_test(utils.ShardRange.MIN, utils.ShardRange.MIN) + do_test(utils.ShardRange.MAX, utils.ShardRange.MAX) + do_test('', utils.ShardRange.MAX) + do_test(u'', utils.ShardRange.MAX) + do_test(None, utils.ShardRange.MAX) + do_test('z', 'z') + do_test('b', 'b') + + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y') + sr.upper = '' + self.assertEqual(sr.MAX, sr.upper) + + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y') + with self.assertRaises(ValueError) as cm: + sr.upper = 'a' + self.assertIn( + "upper ('a') must be greater than or equal to lower ('b')", + str(cm.exception)) + self.assertEqual('b', sr.lower) + self.assertEqual('y', sr.upper) + + def do_test(bad_value): + with self.assertRaises(TypeError) as cm: + sr.upper = bad_value + self.assertIn("upper must be a string", str(cm.exception)) + self.assertEqual('b', sr.lower) + self.assertEqual('y', sr.upper) + + do_test(1) + do_test(1.234) + + def test_end_marker(self): + sr = utils.ShardRange('a/c', utils.Timestamp.now(), '', 'y') + self.assertEqual('y\x00', sr.end_marker) + sr = utils.ShardRange('a/c', utils.Timestamp.now(), '', '') + self.assertEqual('', sr.end_marker) + + def test_bounds_serialization(self): + sr = utils.ShardRange('a/c', utils.Timestamp.now()) + self.assertEqual('a/c', sr.name) + self.assertEqual(utils.ShardRange.MIN, sr.lower) + self.assertEqual('', sr.lower_str) + self.assertEqual(utils.ShardRange.MAX, sr.upper) + self.assertEqual('', sr.upper_str) + self.assertEqual('', sr.end_marker) + + lower = u'\u00e4' + upper = u'\u00fb' + sr = utils.ShardRange('a/%s-%s' % (lower, upper), + utils.Timestamp.now(), lower, upper) + if six.PY3: + self.assertEqual(u'\u00e4', sr.lower) + self.assertEqual(u'\u00e4', sr.lower_str) + self.assertEqual(u'\u00fb', sr.upper) + self.assertEqual(u'\u00fb', sr.upper_str) + self.assertEqual(u'\u00fb\x00', sr.end_marker) + else: + self.assertEqual(u'\u00e4'.encode('utf8'), sr.lower) + self.assertEqual(u'\u00e4'.encode('utf8'), sr.lower_str) + self.assertEqual(u'\u00fb'.encode('utf8'), sr.upper) + self.assertEqual(u'\u00fb'.encode('utf8'), sr.upper_str) + self.assertEqual(u'\u00fb\x00'.encode('utf8'), sr.end_marker) + + def test_entire_namespace(self): + # test entire range (no boundaries) + entire = utils.ShardRange('a/test', utils.Timestamp.now()) + self.assertEqual(utils.ShardRange.MAX, entire.upper) + self.assertEqual(utils.ShardRange.MIN, entire.lower) + self.assertIs(True, entire.entire_namespace()) + + for x in range(100): + self.assertTrue(str(x) in entire) + self.assertTrue(chr(x) in entire) + + for x in ('a', 'z', 'zzzz', '124fsdf', u'\u00e4'): + self.assertTrue(x in entire, '%r should be in %r' % (x, entire)) + + entire.lower = 'a' + self.assertIs(False, entire.entire_namespace()) + + def test_comparisons(self): + ts = utils.Timestamp.now().internal + + # upper (if provided) *must* be greater than lower + with self.assertRaises(ValueError): + utils.ShardRange('f-a', ts, 'f', 'a') + + # test basic boundaries + btoc = utils.ShardRange('a/b-c', ts, 'b', 'c') + atof = utils.ShardRange('a/a-f', ts, 'a', 'f') + ftol = utils.ShardRange('a/f-l', ts, 'f', 'l') + ltor = utils.ShardRange('a/l-r', ts, 'l', 'r') + rtoz = utils.ShardRange('a/r-z', ts, 'r', 'z') + lower = utils.ShardRange('a/lower', ts, '', 'mid') + upper = utils.ShardRange('a/upper', ts, 'mid', '') + entire = utils.ShardRange('a/test', utils.Timestamp.now()) + + # overlapping ranges + dtof = utils.ShardRange('a/d-f', ts, 'd', 'f') + dtom = utils.ShardRange('a/d-m', ts, 'd', 'm') + + # test range > and < + # non-adjacent + self.assertFalse(rtoz < atof) + self.assertTrue(atof < ltor) + self.assertTrue(ltor > atof) + self.assertFalse(ftol > rtoz) + + # adjacent + self.assertFalse(rtoz < ltor) + self.assertTrue(ltor < rtoz) + self.assertFalse(ltor > rtoz) + self.assertTrue(rtoz > ltor) + + # wholly within + self.assertFalse(btoc < atof) + self.assertFalse(btoc > atof) + self.assertFalse(atof < btoc) + self.assertFalse(atof > btoc) + + self.assertFalse(atof < dtof) + self.assertFalse(dtof > atof) + self.assertFalse(atof > dtof) + self.assertFalse(dtof < atof) + + self.assertFalse(dtof < dtom) + self.assertFalse(dtof > dtom) + self.assertFalse(dtom > dtof) + self.assertFalse(dtom < dtof) + + # overlaps + self.assertFalse(atof < dtom) + self.assertFalse(atof > dtom) + self.assertFalse(ltor > dtom) + + # ranges including min/max bounds + self.assertTrue(upper > lower) + self.assertTrue(lower < upper) + self.assertFalse(upper < lower) + self.assertFalse(lower > upper) + + self.assertFalse(lower < entire) + self.assertFalse(entire > lower) + self.assertFalse(lower > entire) + self.assertFalse(entire < lower) + + self.assertFalse(upper < entire) + self.assertFalse(entire > upper) + self.assertFalse(upper > entire) + self.assertFalse(entire < upper) + + self.assertFalse(entire < entire) + self.assertFalse(entire > entire) + + # test range < and > to an item + # range is > lower and <= upper to lower boundary isn't + # actually included + self.assertTrue(ftol > 'f') + self.assertFalse(atof < 'f') + self.assertTrue(ltor < 'y') + + self.assertFalse(ftol < 'f') + self.assertFalse(atof > 'f') + self.assertFalse(ltor > 'y') + + self.assertTrue('f' < ftol) + self.assertFalse('f' > atof) + self.assertTrue('y' > ltor) + + self.assertFalse('f' > ftol) + self.assertFalse('f' < atof) + self.assertFalse('y' < ltor) + + # Now test ranges with only 1 boundary + start_to_l = utils.ShardRange('a/None-l', ts, '', 'l') + l_to_end = utils.ShardRange('a/l-None', ts, 'l', '') + + for x in ('l', 'm', 'z', 'zzz1231sd'): + if x == 'l': + self.assertFalse(x in l_to_end) + self.assertFalse(start_to_l < x) + self.assertFalse(x > start_to_l) + else: + self.assertTrue(x in l_to_end) + self.assertTrue(start_to_l < x) + self.assertTrue(x > start_to_l) + + # Now test some of the range to range checks with missing boundaries + self.assertFalse(atof < start_to_l) + self.assertFalse(start_to_l < entire) + + # Now test ShardRange.overlaps(other) + self.assertTrue(atof.overlaps(atof)) + self.assertFalse(atof.overlaps(ftol)) + self.assertFalse(ftol.overlaps(atof)) + self.assertTrue(atof.overlaps(dtof)) + self.assertTrue(dtof.overlaps(atof)) + self.assertFalse(dtof.overlaps(ftol)) + self.assertTrue(dtom.overlaps(ftol)) + self.assertTrue(ftol.overlaps(dtom)) + self.assertFalse(start_to_l.overlaps(l_to_end)) + + def test_contains(self): + ts = utils.Timestamp.now().internal + lower = utils.ShardRange('a/-h', ts, '', 'h') + mid = utils.ShardRange('a/h-p', ts, 'h', 'p') + upper = utils.ShardRange('a/p-', ts, 'p', '') + entire = utils.ShardRange('a/all', ts, '', '') + + self.assertTrue('a' in entire) + self.assertTrue('x' in entire) + + # the empty string is not a valid object name, so it cannot be in any + # range + self.assertFalse('' in lower) + self.assertFalse('' in upper) + self.assertFalse('' in entire) + + self.assertTrue('a' in lower) + self.assertTrue('h' in lower) + self.assertFalse('i' in lower) + + self.assertFalse('h' in mid) + self.assertTrue('p' in mid) + + self.assertFalse('p' in upper) + self.assertTrue('x' in upper) + + self.assertIn(utils.ShardRange.MAX, entire) + self.assertNotIn(utils.ShardRange.MAX, lower) + self.assertIn(utils.ShardRange.MAX, upper) + + # lower bound is excluded so MIN cannot be in any range. + self.assertNotIn(utils.ShardRange.MIN, entire) + self.assertNotIn(utils.ShardRange.MIN, upper) + self.assertNotIn(utils.ShardRange.MIN, lower) + + def test_includes(self): + ts = utils.Timestamp.now().internal + _to_h = utils.ShardRange('a/-h', ts, '', 'h') + d_to_t = utils.ShardRange('a/d-t', ts, 'd', 't') + d_to_k = utils.ShardRange('a/d-k', ts, 'd', 'k') + e_to_l = utils.ShardRange('a/e-l', ts, 'e', 'l') + k_to_t = utils.ShardRange('a/k-t', ts, 'k', 't') + p_to_ = utils.ShardRange('a/p-', ts, 'p', '') + t_to_ = utils.ShardRange('a/t-', ts, 't', '') + entire = utils.ShardRange('a/all', ts, '', '') + + self.assertTrue(entire.includes(entire)) + self.assertTrue(d_to_t.includes(d_to_t)) + self.assertTrue(_to_h.includes(_to_h)) + self.assertTrue(p_to_.includes(p_to_)) + + self.assertTrue(entire.includes(_to_h)) + self.assertTrue(entire.includes(d_to_t)) + self.assertTrue(entire.includes(p_to_)) + + self.assertTrue(d_to_t.includes(d_to_k)) + self.assertTrue(d_to_t.includes(e_to_l)) + self.assertTrue(d_to_t.includes(k_to_t)) + self.assertTrue(p_to_.includes(t_to_)) + + self.assertFalse(_to_h.includes(d_to_t)) + self.assertFalse(p_to_.includes(d_to_t)) + self.assertFalse(k_to_t.includes(d_to_k)) + self.assertFalse(d_to_k.includes(e_to_l)) + self.assertFalse(k_to_t.includes(e_to_l)) + self.assertFalse(t_to_.includes(p_to_)) + + self.assertFalse(_to_h.includes(entire)) + self.assertFalse(p_to_.includes(entire)) + self.assertFalse(d_to_t.includes(entire)) + + def test_repr(self): + ts = next(self.ts_iter) + ts.offset = 1234 + meta_ts = next(self.ts_iter) + state_ts = next(self.ts_iter) + sr = utils.ShardRange('a/c', ts, 'l', 'u', 100, 1000, + meta_timestamp=meta_ts, + state=utils.ShardRange.ACTIVE, + state_timestamp=state_ts) + self.assertEqual( + "ShardRange<'l' to 'u' as of %s, (100, 1000) as of %s, " + "active as of %s>" + % (ts.internal, meta_ts.internal, state_ts.internal), str(sr)) + + ts.offset = 0 + meta_ts.offset = 2 + state_ts.offset = 3 + sr = utils.ShardRange('a/c', ts, '', '', 100, 1000, + meta_timestamp=meta_ts, + state=utils.ShardRange.FOUND, + state_timestamp=state_ts) + self.assertEqual( + "ShardRange" + % (ts.internal, meta_ts.internal, state_ts.internal), str(sr)) + + def test_copy(self): + sr = utils.ShardRange('a/c', next(self.ts_iter), 'x', 'y', 99, 99000, + meta_timestamp=next(self.ts_iter), + state=utils.ShardRange.CREATED, + state_timestamp=next(self.ts_iter)) + new = sr.copy() + self.assertEqual(dict(sr), dict(new)) + + new = sr.copy(deleted=1) + self.assertEqual(dict(sr, deleted=1), dict(new)) + + new_timestamp = next(self.ts_iter) + new = sr.copy(timestamp=new_timestamp) + self.assertEqual(dict(sr, timestamp=new_timestamp.internal, + meta_timestamp=new_timestamp.internal, + state_timestamp=new_timestamp.internal), + dict(new)) + + new = sr.copy(timestamp=new_timestamp, object_count=99) + self.assertEqual(dict(sr, timestamp=new_timestamp.internal, + meta_timestamp=new_timestamp.internal, + state_timestamp=new_timestamp.internal, + object_count=99), + dict(new)) + + def test_make_path(self): + ts = utils.Timestamp.now() + actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 0) + parent_hash = hashlib.md5(b'parent').hexdigest() + self.assertEqual('a/root-%s-%s-0' % (parent_hash, ts.internal), actual) + actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 3) + self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual) + actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, '3') + self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual) + actual = utils.ShardRange.make_path( + 'a', 'root', 'parent', ts.internal, '3') + self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual) + actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 'foo') + self.assertEqual('a/root-%s-%s-foo' % (parent_hash, ts.internal), + actual) + + if __name__ == '__main__': unittest.main() diff --git a/test/unit/common/test_wsgi.py b/test/unit/common/test_wsgi.py index 774fcf84e8..8e88d09b59 100644 --- a/test/unit/common/test_wsgi.py +++ b/test/unit/common/test_wsgi.py @@ -1270,9 +1270,10 @@ class TestWorkersStrategy(unittest.TestCase): pid += 1 sock_count += 1 + mypid = os.getpid() self.assertEqual([ - 'Started child %s' % 88, - 'Started child %s' % 89, + 'Started child %s from parent %s' % (88, mypid), + 'Started child %s from parent %s' % (89, mypid), ], self.logger.get_lines_for_level('notice')) self.assertEqual(2, sock_count) @@ -1282,7 +1283,7 @@ class TestWorkersStrategy(unittest.TestCase): self.strategy.register_worker_exit(88) self.assertEqual([ - 'Removing dead child %s' % 88, + 'Removing dead child %s from parent %s' % (88, mypid) ], self.logger.get_lines_for_level('error')) for s, i in self.strategy.new_worker_socks(): @@ -1294,9 +1295,9 @@ class TestWorkersStrategy(unittest.TestCase): self.assertEqual(1, sock_count) self.assertEqual([ - 'Started child %s' % 88, - 'Started child %s' % 89, - 'Started child %s' % 90, + 'Started child %s from parent %s' % (88, mypid), + 'Started child %s from parent %s' % (89, mypid), + 'Started child %s from parent %s' % (90, mypid), ], self.logger.get_lines_for_level('notice')) def test_post_fork_hook(self): diff --git a/test/unit/container/test_backend.py b/test/unit/container/test_backend.py index 1febf47cfb..79ede02901 100644 --- a/test/unit/container/test_backend.py +++ b/test/unit/container/test_backend.py @@ -14,13 +14,13 @@ # limitations under the License. """ Tests for swift.container.backend """ - +import errno import os import hashlib +import inspect import unittest from time import sleep, time from uuid import uuid4 -import itertools import random from collections import defaultdict from contextlib import contextmanager @@ -28,38 +28,69 @@ import sqlite3 import pickle import json +from swift.common.exceptions import LockTimeout from swift.container.backend import ContainerBroker, \ - update_new_item_from_existing -from swift.common.utils import Timestamp, encode_timestamps + update_new_item_from_existing, UNSHARDED, SHARDING, SHARDED, \ + COLLAPSED, SHARD_LISTING_STATES, SHARD_UPDATE_STATES +from swift.common.db import DatabaseAlreadyExists, GreenDBConnection +from swift.common.utils import Timestamp, encode_timestamps, hash_path, \ + ShardRange, make_db_file_path from swift.common.storage_policy import POLICIES import mock +from test import annotate_failure from test.unit import (patch_policies, with_tempdir, make_timestamp_iter, - EMPTY_ETAG) + EMPTY_ETAG, FakeLogger, mock_timestamp_now) from test.unit.common import test_db class TestContainerBroker(unittest.TestCase): """Tests for ContainerBroker""" + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'policy_stat', + 'container_info', 'shard_range'} + + def _assert_shard_ranges(self, broker, expected, include_own=False): + actual = broker.get_shard_ranges(include_deleted=True, + include_own=include_own) + self.assertEqual([dict(sr) for sr in expected], + [dict(sr) for sr in actual]) def test_creation(self): # Test ContainerBroker.__init__ broker = ContainerBroker(':memory:', account='a', container='c') - self.assertEqual(broker.db_file, ':memory:') + self.assertEqual(broker._db_file, ':memory:') broker.initialize(Timestamp('1').internal, 0) with broker.get() as conn: curs = conn.cursor() curs.execute('SELECT 1') self.assertEqual(curs.fetchall()[0][0], 1) + curs.execute("SELECT name FROM sqlite_master WHERE type='table';") + self.assertEqual(self.expected_db_tables, + {row[0] for row in curs.fetchall()}) + # check the update trigger + broker.put_object('blah', Timestamp.now().internal, 0, 'text/plain', + 'etag', 0, 0) + with broker.get() as conn: + with self.assertRaises(sqlite3.DatabaseError) as cm: + conn.execute('UPDATE object SET name="blah";') + self.assertIn('UPDATE not allowed', str(cm.exception)) + if 'shard_range' in self.expected_db_tables: + # check the update trigger + broker.merge_shard_ranges(broker.get_own_shard_range()) + with broker.get() as conn: + with self.assertRaises(sqlite3.DatabaseError) as cm: + conn.execute('UPDATE shard_range SET name="blah";') + self.assertIn('UPDATE not allowed', str(cm.exception)) @patch_policies def test_storage_policy_property(self): - ts = (Timestamp(t).internal for t in itertools.count(int(time()))) + ts = make_timestamp_iter() for policy in POLICIES: broker = ContainerBroker(':memory:', account='a', container='policy_%s' % policy.name) - broker.initialize(next(ts), policy.idx) + broker.initialize(next(ts).internal, policy.idx) with broker.get() as conn: try: conn.execute('''SELECT storage_policy_index @@ -92,16 +123,296 @@ class TestContainerBroker(unittest.TestCase): pass self.assertTrue(broker.conn is None) - def test_empty(self): + @with_tempdir + def test_is_deleted(self, tempdir): + # Test ContainerBroker.is_deleted() and get_info_is_deleted() + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + + self.assertFalse(broker.is_deleted()) + broker.delete_db(next(ts_iter).internal) + self.assertTrue(broker.is_deleted()) + + def check_object_counted(broker_to_test, broker_with_object): + obj = {'name': 'o', 'created_at': next(ts_iter).internal, + 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG, + 'deleted': 0} + broker_with_object.merge_items([dict(obj)]) + self.assertFalse(broker_to_test.is_deleted()) + info, deleted = broker_to_test.get_info_is_deleted() + self.assertFalse(deleted) + self.assertEqual(1, info['object_count']) + obj.update({'created_at': next(ts_iter).internal, 'deleted': 1}) + broker_with_object.merge_items([dict(obj)]) + self.assertTrue(broker_to_test.is_deleted()) + info, deleted = broker_to_test.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + + def check_object_not_counted(broker): + obj = {'name': 'o', 'created_at': next(ts_iter).internal, + 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG, + 'deleted': 0} + broker.merge_items([dict(obj)]) + self.assertTrue(broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + obj.update({'created_at': next(ts_iter).internal, 'deleted': 1}) + broker.merge_items([dict(obj)]) + self.assertTrue(broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + + def check_shard_ranges_not_counted(): + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + + def check_shard_ranges_counted(): + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + counted_states = (ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + expected = state not in counted_states + self.assertEqual(expected, broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertEqual(expected, deleted) + self.assertEqual(0 if expected else 13, info['object_count']) + + sr.update_meta(0, 0, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + + # unsharded + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + # move to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + broker.delete_db(next(ts_iter).internal) + self.assertTrue(broker.is_deleted()) + + # check object in retiring db is considered + check_object_counted(broker, broker.get_brokers()[0]) + self.assertTrue(broker.is_deleted()) + check_shard_ranges_not_counted() + # misplaced object in fresh db is not considered + check_object_not_counted(broker) + + # move to sharded state + self.assertTrue(broker.set_sharded_state()) + check_object_not_counted(broker) + check_shard_ranges_counted() + + # own shard range has no influence + own_sr = broker.get_own_shard_range() + own_sr.update_meta(3, 4, meta_timestamp=next(ts_iter)) + broker.merge_shard_ranges([own_sr]) + self.assertTrue(broker.is_deleted()) + + @with_tempdir + def test_empty(self, tempdir): # Test ContainerBroker.empty - broker = ContainerBroker(':memory:', account='a', container='c') - broker.initialize(Timestamp('1').internal, 0) + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + self.assertTrue(broker.is_root_container()) + + def check_object_counted(broker_to_test, broker_with_object): + obj = {'name': 'o', 'created_at': next(ts_iter).internal, + 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG, + 'deleted': 0} + broker_with_object.merge_items([dict(obj)]) + self.assertFalse(broker_to_test.empty()) + # and delete it + obj.update({'created_at': next(ts_iter).internal, 'deleted': 1}) + broker_with_object.merge_items([dict(obj)]) + self.assertTrue(broker_to_test.empty()) + + def check_shard_ranges_not_counted(): + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + + # empty other shard ranges do not influence result + sr.update_meta(0, 0, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + self.assertTrue(broker.empty()) - broker.put_object('o', Timestamp.now().internal, 0, 'text/plain', - 'd41d8cd98f00b204e9800998ecf8427e') - self.assertTrue(not broker.empty()) - sleep(.00001) - broker.delete_object('o', Timestamp.now().internal) + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + # own shard range is not considered for object count + own_sr = broker.get_own_shard_range() + self.assertEqual(0, own_sr.object_count) + broker.merge_shard_ranges([own_sr]) + self.assertTrue(broker.empty()) + + broker.put_object('o', next(ts_iter).internal, 0, 'text/plain', + EMPTY_ETAG) + own_sr = broker.get_own_shard_range() + self.assertEqual(1, own_sr.object_count) + broker.merge_shard_ranges([own_sr]) + self.assertFalse(broker.empty()) + broker.delete_object('o', next(ts_iter).internal) + self.assertTrue(broker.empty()) + + # have own shard range but in state ACTIVE + self.assertEqual(ShardRange.ACTIVE, own_sr.state) + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + def check_shard_ranges_counted(): + # other shard range is considered + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + counted_states = (ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertEqual(state not in counted_states, broker.empty()) + + # empty other shard ranges do not influence result + sr.update_meta(0, 0, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + + # enable sharding + broker.enable_sharding(next(ts_iter)) + check_object_counted(broker, broker) + check_shard_ranges_counted() + + # move to sharding state + self.assertTrue(broker.set_sharding_state()) + # check object in retiring db is considered + check_object_counted(broker, broker.get_brokers()[0]) + self.assertTrue(broker.empty()) + # as well as misplaced objects in fresh db + check_object_counted(broker, broker) + check_shard_ranges_counted() + + # move to sharded state + self.assertTrue(broker.set_sharded_state()) + self.assertTrue(broker.empty()) + check_object_counted(broker, broker) + check_shard_ranges_counted() + + # own shard range still has no influence + own_sr = broker.get_own_shard_range() + own_sr.update_meta(3, 4, meta_timestamp=next(ts_iter)) + broker.merge_shard_ranges([own_sr]) + self.assertTrue(broker.empty()) + + @with_tempdir + def test_empty_shard_container(self, tempdir): + # Test ContainerBroker.empty for a shard container where shard range + # usage should not be considered + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='.shards_a', container='cc') + broker.initialize(next(ts_iter).internal, 0) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertFalse(broker.is_root_container()) + + def check_object_counted(broker_to_test, broker_with_object): + obj = {'name': 'o', 'created_at': next(ts_iter).internal, + 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG, + 'deleted': 0} + broker_with_object.merge_items([dict(obj)]) + self.assertFalse(broker_to_test.empty()) + # and delete it + obj.update({'created_at': next(ts_iter).internal, 'deleted': 1}) + broker_with_object.merge_items([dict(obj)]) + self.assertTrue(broker_to_test.empty()) + + self.assertTrue(broker.empty()) + check_object_counted(broker, broker) + + # own shard range is not considered for object count + own_sr = broker.get_own_shard_range() + self.assertEqual(0, own_sr.object_count) + broker.merge_shard_ranges([own_sr]) + self.assertTrue(broker.empty()) + + broker.put_object('o', next(ts_iter).internal, 0, 'text/plain', + EMPTY_ETAG) + own_sr = broker.get_own_shard_range() + self.assertEqual(1, own_sr.object_count) + broker.merge_shard_ranges([own_sr]) + self.assertFalse(broker.empty()) + broker.delete_object('o', next(ts_iter).internal) + self.assertTrue(broker.empty()) + + def check_shard_ranges_not_counted(): + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + + # empty other shard ranges do not influence result + sr.update_meta(0, 0, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + + check_shard_ranges_not_counted() + + # move to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + + # check object in retiring db is considered + check_object_counted(broker, broker.get_brokers()[0]) + self.assertTrue(broker.empty()) + # as well as misplaced objects in fresh db + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + # move to sharded state + self.assertTrue(broker.set_sharded_state()) + self.assertTrue(broker.empty()) + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + # own shard range still has no influence + own_sr = broker.get_own_shard_range() + own_sr.update_meta(3, 4, meta_timestamp=next(ts_iter)) + broker.merge_shard_ranges([own_sr]) self.assertTrue(broker.empty()) def test_reclaim(self): @@ -164,48 +475,120 @@ class TestContainerBroker(unittest.TestCase): broker.reclaim(Timestamp.now().internal, time()) broker.delete_db(Timestamp.now().internal) + @with_tempdir + def test_reclaim_deadlock(self, tempdir): + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(Timestamp(100).internal, 0) + # there's some magic count here that causes the failure, something + # about the size of object records and sqlite page size maybe? + count = 23000 + for i in range(count): + obj_name = 'o%d' % i + ts = Timestamp(200).internal + broker.delete_object(obj_name, ts) + broker._commit_puts() + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT count(*) FROM object").fetchone()[0], count) + # make a broker whose container attribute is not yet set so that + # reclaim will need to query info to set it + broker = ContainerBroker(db_path, timeout=1) + # verify that reclaim doesn't get deadlocked and timeout + broker.reclaim(300, 300) + # check all objects were reclaimed + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT count(*) FROM object" + ).fetchone()[0], 0) + + @with_tempdir + def test_reclaim_shard_ranges(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + older = next(ts_iter) + same = next(ts_iter) + newer = next(ts_iter) + shard_ranges = [ + ShardRange('.shards_a/older_deleted', older.internal, '', 'a', + deleted=True), + ShardRange('.shards_a/same_deleted', same.internal, 'a', 'b', + deleted=True), + ShardRange('.shards_a/newer_deleted', newer.internal, 'b', 'c', + deleted=True), + ShardRange('.shards_a/older', older.internal, 'c', 'd'), + ShardRange('.shards_a/same', same.internal, 'd', 'e'), + ShardRange('.shards_a/newer', newer.internal, 'e', 'f'), + # own shard range is never reclaimed, even if deleted + ShardRange('a/c', older.internal, '', '', deleted=True)] + broker.merge_shard_ranges( + random.sample(shard_ranges, len(shard_ranges))) + + def assert_row_count(expected): + with broker.get() as conn: + res = conn.execute("SELECT count(*) FROM shard_range") + self.assertEqual(expected, res.fetchone()[0]) + + broker.reclaim(older.internal, older.internal) + assert_row_count(7) + self._assert_shard_ranges(broker, shard_ranges, include_own=True) + broker.reclaim(older.internal, same.internal) + assert_row_count(6) + self._assert_shard_ranges(broker, shard_ranges[1:], include_own=True) + broker.reclaim(older.internal, newer.internal) + assert_row_count(5) + self._assert_shard_ranges(broker, shard_ranges[2:], include_own=True) + broker.reclaim(older.internal, next(ts_iter).internal) + assert_row_count(4) + self._assert_shard_ranges(broker, shard_ranges[3:], include_own=True) + def test_get_info_is_deleted(self): - start = int(time()) - ts = (Timestamp(t).internal for t in itertools.count(start)) + ts = make_timestamp_iter() + start = next(ts) broker = ContainerBroker(':memory:', account='test_account', container='test_container') # create it - broker.initialize(next(ts), POLICIES.default.idx) + broker.initialize(start.internal, POLICIES.default.idx) info, is_deleted = broker.get_info_is_deleted() self.assertEqual(is_deleted, broker.is_deleted()) self.assertEqual(is_deleted, False) # sanity self.assertEqual(info, broker.get_info()) - self.assertEqual(info['put_timestamp'], Timestamp(start).internal) + self.assertEqual(info['put_timestamp'], start.internal) self.assertTrue(Timestamp(info['created_at']) >= start) self.assertEqual(info['delete_timestamp'], '0') if self.__class__ in (TestContainerBrokerBeforeMetadata, TestContainerBrokerBeforeXSync, - TestContainerBrokerBeforeSPI): + TestContainerBrokerBeforeSPI, + TestContainerBrokerBeforeShardRanges): self.assertEqual(info['status_changed_at'], '0') else: self.assertEqual(info['status_changed_at'], - Timestamp(start).internal) + start.internal) # delete it delete_timestamp = next(ts) - broker.delete_db(delete_timestamp) + broker.delete_db(delete_timestamp.internal) info, is_deleted = broker.get_info_is_deleted() self.assertEqual(is_deleted, True) # sanity self.assertEqual(is_deleted, broker.is_deleted()) self.assertEqual(info, broker.get_info()) - self.assertEqual(info['put_timestamp'], Timestamp(start).internal) + self.assertEqual(info['put_timestamp'], start.internal) self.assertTrue(Timestamp(info['created_at']) >= start) self.assertEqual(info['delete_timestamp'], delete_timestamp) self.assertEqual(info['status_changed_at'], delete_timestamp) # bring back to life - broker.put_object('obj', next(ts), 0, 'text/plain', 'etag', + broker.put_object('obj', next(ts).internal, 0, 'text/plain', 'etag', storage_policy_index=broker.storage_policy_index) info, is_deleted = broker.get_info_is_deleted() self.assertEqual(is_deleted, False) # sanity self.assertEqual(is_deleted, broker.is_deleted()) self.assertEqual(info, broker.get_info()) - self.assertEqual(info['put_timestamp'], Timestamp(start).internal) + self.assertEqual(info['put_timestamp'], start.internal) self.assertTrue(Timestamp(info['created_at']) >= start) self.assertEqual(info['delete_timestamp'], delete_timestamp) self.assertEqual(info['status_changed_at'], delete_timestamp) @@ -432,6 +815,273 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(conn.execute( "SELECT deleted FROM object").fetchone()[0], 0) + def test_merge_shard_range_single_record(self): + # Test ContainerBroker.merge_shard_range + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + + ts_iter = make_timestamp_iter() + # Stash these for later + old_put_timestamp = next(ts_iter).internal + old_delete_timestamp = next(ts_iter).internal + + # Create initial object + timestamp = next(ts_iter).internal + meta_timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'low', 'up', meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'low') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'up') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 0) + + # Reput same event + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'low', 'up', meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'low') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'up') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 0) + + # Put new event + timestamp = next(ts_iter).internal + meta_timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lower', 'upper', 1, 2, meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lower') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upper') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 1) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 2) + + # Put old event + broker.merge_shard_ranges( + ShardRange('"a/{}"', old_put_timestamp, + 'lower', 'upper', 1, 2, meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) # Not old_put_timestamp! + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lower') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upper') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 1) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 2) + + # Put old delete event + broker.merge_shard_ranges( + ShardRange('"a/{}"', old_delete_timestamp, + 'lower', 'upper', meta_timestamp=meta_timestamp, + deleted=1)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) # Not old_delete_timestamp! + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lower') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upper') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 1) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 2) + + # Put new delete event + timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lower', 'upper', meta_timestamp=meta_timestamp, + deleted=1)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 1) + + # Put new event + timestamp = next(ts_iter).internal + meta_timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lowerer', 'upperer', 3, 4, + meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lowerer') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upperer') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 3) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 4) + + # We'll use this later + in_between_timestamp = next(ts_iter).internal + + # New update event, meta_timestamp increases + meta_timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lowerer', 'upperer', 3, 4, + meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lowerer') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upperer') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 3) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 4) + + # Put event from after last put but before last post + timestamp = in_between_timestamp + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lowererer', 'uppererer', 5, 6, + meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lowererer') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'uppererer') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 5) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 6) + + def test_merge_shard_ranges_deleted(self): + # Test ContainerBroker.merge_shard_ranges sets deleted attribute + ts_iter = make_timestamp_iter() + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + # put shard range + broker.merge_shard_ranges(ShardRange('a/o', next(ts_iter).internal)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT count(*) FROM shard_range " + "WHERE deleted = 0").fetchone()[0], 1) + self.assertEqual(conn.execute( + "SELECT count(*) FROM shard_range " + "WHERE deleted = 1").fetchone()[0], 0) + + # delete shard range + broker.merge_shard_ranges(ShardRange('a/o', next(ts_iter).internal, + deleted=1)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT count(*) FROM shard_range " + "WHERE deleted = 0").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT count(*) FROM shard_range " + "WHERE deleted = 1").fetchone()[0], 1) + def test_make_tuple_for_pickle(self): record = {'name': 'obj', 'created_at': '1234567890.12345', @@ -559,7 +1209,7 @@ class TestContainerBroker(unittest.TestCase): "SELECT deleted FROM object").fetchone()[0], deleted) def _test_put_object_multiple_encoded_timestamps(self, broker): - ts = (Timestamp(t) for t in itertools.count(int(time()))) + ts = make_timestamp_iter() broker.initialize(next(ts).internal, 0) t = [next(ts) for _ in range(9)] @@ -619,6 +1269,194 @@ class TestContainerBroker(unittest.TestCase): broker = ContainerBroker(':memory:', account='a', container='c') self._test_put_object_multiple_encoded_timestamps(broker) + @with_tempdir + def test_get_db_state(self, tempdir): + acct = 'account' + cont = 'container' + hsh = hash_path(acct, cont) + db_file = "%s.db" % hsh + epoch = Timestamp.now() + fresh_db_file = "%s_%s.db" % (hsh, epoch.normal) + db_path = os.path.join(tempdir, db_file) + fresh_db_path = os.path.join(tempdir, fresh_db_file) + ts = Timestamp.now() + + # First test NOTFOUND state + broker = ContainerBroker(db_path, account=acct, container=cont) + self.assertEqual(broker.get_db_state(), 'not_found') + + # Test UNSHARDED state, that is when db_file exists and fresh_db_file + # doesn't + broker.initialize(ts.internal, 0) + self.assertEqual(broker.get_db_state(), 'unsharded') + + # Test the SHARDING state, this is the period when both the db_file and + # the fresh_db_file exist + fresh_broker = ContainerBroker(fresh_db_path, account=acct, + container=cont, force_db_file=True) + fresh_broker.initialize(ts.internal, 0) + own_shard_range = fresh_broker.get_own_shard_range() + own_shard_range.update_state(ShardRange.SHARDING) + own_shard_range.epoch = epoch + shard_range = ShardRange( + '.shards_%s/%s' % (acct, cont), Timestamp.now()) + fresh_broker.merge_shard_ranges([own_shard_range, shard_range]) + + self.assertEqual(fresh_broker.get_db_state(), 'sharding') + # old broker will also change state if we reload its db files + broker.reload_db_files() + self.assertEqual(broker.get_db_state(), 'sharding') + + # Test the SHARDED state, this is when only fresh_db_file exists. + os.unlink(db_path) + fresh_broker.reload_db_files() + self.assertEqual(fresh_broker.get_db_state(), 'sharded') + + # Test the COLLAPSED state, this is when only fresh_db_file exists. + shard_range.deleted = 1 + shard_range.timestamp = Timestamp.now() + fresh_broker.merge_shard_ranges([shard_range]) + self.assertEqual(fresh_broker.get_db_state(), 'collapsed') + + # back to UNSHARDED if the desired epoch changes + own_shard_range.update_state(ShardRange.SHRINKING, + state_timestamp=Timestamp.now()) + own_shard_range.epoch = Timestamp.now() + fresh_broker.merge_shard_ranges([own_shard_range]) + self.assertEqual(fresh_broker.get_db_state(), 'unsharded') + + @with_tempdir + def test_db_file(self, tempdir): + acct = 'account' + cont = 'continer' + hsh = hash_path(acct, cont) + db_file = "%s.db" % hsh + ts_epoch = Timestamp.now() + fresh_db_file = "%s_%s.db" % (hsh, ts_epoch.normal) + db_path = os.path.join(tempdir, db_file) + fresh_db_path = os.path.join(tempdir, fresh_db_file) + ts = Timestamp.now() + + # First test NOTFOUND state, this will return the db_file passed + # in the constructor + def check_unfound_db_files(broker, init_db_file): + self.assertEqual(init_db_file, broker.db_file) + self.assertEqual(broker._db_file, db_path) + self.assertFalse(os.path.exists(db_path)) + self.assertFalse(os.path.exists(fresh_db_path)) + self.assertEqual([], broker.db_files) + + broker = ContainerBroker(db_path, account=acct, container=cont) + check_unfound_db_files(broker, db_path) + broker = ContainerBroker(fresh_db_path, account=acct, container=cont) + check_unfound_db_files(broker, fresh_db_path) + + # Test UNSHARDED state, that is when db_file exists and fresh_db_file + # doesn't, so it should return the db_path + def check_unsharded_db_files(broker): + self.assertEqual(broker.db_file, db_path) + self.assertEqual(broker._db_file, db_path) + self.assertTrue(os.path.exists(db_path)) + self.assertFalse(os.path.exists(fresh_db_path)) + self.assertEqual([db_path], broker.db_files) + + broker = ContainerBroker(db_path, account=acct, container=cont) + broker.initialize(ts.internal, 0) + check_unsharded_db_files(broker) + broker = ContainerBroker(fresh_db_path, account=acct, container=cont) + check_unsharded_db_files(broker) + # while UNSHARDED db_path is still used despite giving fresh_db_path + # to init, so we cannot initialize this broker + with self.assertRaises(DatabaseAlreadyExists): + broker.initialize(ts.internal, 0) + + # Test the SHARDING state, this is the period when both the db_file and + # the fresh_db_file exist, in this case it should return the + # fresh_db_path. + def check_sharding_db_files(broker): + self.assertEqual(broker.db_file, fresh_db_path) + self.assertEqual(broker._db_file, db_path) + self.assertTrue(os.path.exists(db_path)) + self.assertTrue(os.path.exists(fresh_db_path)) + self.assertEqual([db_path, fresh_db_path], broker.db_files) + + # Use force_db_file to have db_shard_path created when initializing + broker = ContainerBroker(fresh_db_path, account=acct, + container=cont, force_db_file=True) + self.assertEqual([db_path], broker.db_files) + broker.initialize(ts.internal, 0) + check_sharding_db_files(broker) + broker = ContainerBroker(db_path, account=acct, container=cont) + check_sharding_db_files(broker) + broker = ContainerBroker(fresh_db_path, account=acct, container=cont) + check_sharding_db_files(broker) + + # force_db_file can be used to open db_path specifically + forced_broker = ContainerBroker(db_path, account=acct, + container=cont, force_db_file=True) + self.assertEqual(forced_broker.db_file, db_path) + self.assertEqual(forced_broker._db_file, db_path) + + def check_sharded_db_files(broker): + self.assertEqual(broker.db_file, fresh_db_path) + self.assertEqual(broker._db_file, db_path) + self.assertFalse(os.path.exists(db_path)) + self.assertTrue(os.path.exists(fresh_db_path)) + self.assertEqual([fresh_db_path], broker.db_files) + + # Test the SHARDED state, this is when only fresh_db_file exists, so + # obviously this should return the fresh_db_path + os.unlink(db_path) + broker.reload_db_files() + check_sharded_db_files(broker) + broker = ContainerBroker(db_path, account=acct, container=cont) + check_sharded_db_files(broker) + + @with_tempdir + def test_sharding_initiated_and_required(self, tempdir): + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(Timestamp.now().internal, 0) + # no shard ranges + self.assertIs(False, broker.sharding_initiated()) + self.assertIs(False, broker.sharding_required()) + # only own shard range + own_sr = broker.get_own_shard_range() + for state in ShardRange.STATES: + own_sr.update_state(state, state_timestamp=Timestamp.now()) + broker.merge_shard_ranges(own_sr) + self.assertIs(False, broker.sharding_initiated()) + self.assertIs(False, broker.sharding_required()) + + # shard ranges, still ACTIVE + own_sr.update_state(ShardRange.ACTIVE, + state_timestamp=Timestamp.now()) + broker.merge_shard_ranges(own_sr) + broker.merge_shard_ranges(ShardRange('.shards_a/cc', Timestamp.now())) + self.assertIs(False, broker.sharding_initiated()) + self.assertIs(False, broker.sharding_required()) + + # shard ranges and SHARDING, SHRINKING or SHARDED + broker.enable_sharding(Timestamp.now()) + self.assertTrue(broker.set_sharding_state()) + self.assertIs(True, broker.sharding_initiated()) + self.assertIs(True, broker.sharding_required()) + + epoch = broker.db_epoch + own_sr.update_state(ShardRange.SHRINKING, + state_timestamp=Timestamp.now()) + own_sr.epoch = epoch + broker.merge_shard_ranges(own_sr) + self.assertIs(True, broker.sharding_initiated()) + self.assertIs(True, broker.sharding_required()) + + own_sr.update_state(ShardRange.SHARDED) + broker.merge_shard_ranges(own_sr) + self.assertTrue(broker.set_sharded_state()) + self.assertIs(True, broker.sharding_initiated()) + self.assertIs(False, broker.sharding_required()) + @with_tempdir def test_put_object_multiple_encoded_timestamps_using_file(self, tempdir): # Test ContainerBroker.put_object with differing data, content-type @@ -629,7 +1467,7 @@ class TestContainerBroker(unittest.TestCase): self._test_put_object_multiple_encoded_timestamps(broker) def _test_put_object_multiple_explicit_timestamps(self, broker): - ts = (Timestamp(t) for t in itertools.count(int(time()))) + ts = make_timestamp_iter() broker.initialize(next(ts).internal, 0) t = [next(ts) for _ in range(11)] @@ -733,7 +1571,7 @@ class TestContainerBroker(unittest.TestCase): def test_last_modified_time(self): # Test container listing reports the most recent of data or metadata # timestamp as last-modified time - ts = (Timestamp(t) for t in itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') broker.initialize(next(ts).internal, 0) @@ -786,18 +1624,17 @@ class TestContainerBroker(unittest.TestCase): @patch_policies def test_put_misplaced_object_does_not_effect_container_stats(self): policy = random.choice(list(POLICIES)) - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') - broker.initialize(next(ts), policy.idx) + broker.initialize(next(ts).internal, policy.idx) # migration tests may not honor policy on initialize if isinstance(self, ContainerBrokerMigrationMixin): real_storage_policy_index = \ broker.get_info()['storage_policy_index'] policy = [p for p in POLICIES if p.idx == real_storage_policy_index][0] - broker.put_object('correct_o', next(ts), 123, 'text/plain', + broker.put_object('correct_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=policy.idx) info = broker.get_info() @@ -805,7 +1642,7 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(123, info['bytes_used']) other_policy = random.choice([p for p in POLICIES if p is not policy]) - broker.put_object('wrong_o', next(ts), 123, 'text/plain', + broker.put_object('wrong_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=other_policy.idx) self.assertEqual(1, info['object_count']) @@ -814,23 +1651,22 @@ class TestContainerBroker(unittest.TestCase): @patch_policies def test_has_multiple_policies(self): policy = random.choice(list(POLICIES)) - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') - broker.initialize(next(ts), policy.idx) + broker.initialize(next(ts).internal, policy.idx) # migration tests may not honor policy on initialize if isinstance(self, ContainerBrokerMigrationMixin): real_storage_policy_index = \ broker.get_info()['storage_policy_index'] policy = [p for p in POLICIES if p.idx == real_storage_policy_index][0] - broker.put_object('correct_o', next(ts), 123, 'text/plain', + broker.put_object('correct_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=policy.idx) self.assertFalse(broker.has_multiple_policies()) other_policy = [p for p in POLICIES if p is not policy][0] - broker.put_object('wrong_o', next(ts), 123, 'text/plain', + broker.put_object('wrong_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=other_policy.idx) self.assertTrue(broker.has_multiple_policies()) @@ -838,11 +1674,10 @@ class TestContainerBroker(unittest.TestCase): @patch_policies def test_get_policy_info(self): policy = random.choice(list(POLICIES)) - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') - broker.initialize(next(ts), policy.idx) + broker.initialize(next(ts).internal, policy.idx) # migration tests may not honor policy on initialize if isinstance(self, ContainerBrokerMigrationMixin): real_storage_policy_index = \ @@ -854,7 +1689,7 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(policy_stats, expected) # add an object - broker.put_object('correct_o', next(ts), 123, 'text/plain', + broker.put_object('correct_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=policy.idx) policy_stats = broker.get_policy_stats() @@ -864,7 +1699,7 @@ class TestContainerBroker(unittest.TestCase): # add a misplaced object other_policy = random.choice([p for p in POLICIES if p is not policy]) - broker.put_object('wrong_o', next(ts), 123, 'text/plain', + broker.put_object('wrong_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=other_policy.idx) policy_stats = broker.get_policy_stats() @@ -876,15 +1711,14 @@ class TestContainerBroker(unittest.TestCase): @patch_policies def test_policy_stat_tracking(self): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') # Note: in subclasses of this TestCase that inherit the # ContainerBrokerMigrationMixin, passing POLICIES.default.idx here has # no effect and broker.get_policy_stats() returns a dict with a single # entry mapping policy index 0 to the container stats - broker.initialize(next(ts), POLICIES.default.idx) + broker.initialize(next(ts).internal, POLICIES.default.idx) stats = defaultdict(dict) def assert_empty_default_policy_stats(policy_stats): @@ -904,7 +1738,7 @@ class TestContainerBroker(unittest.TestCase): policy_index = random.randint(0, iters * 0.1) name = 'object-%s' % random.randint(0, iters * 0.1) size = random.randint(0, iters) - broker.put_object(name, next(ts), size, 'text/plain', + broker.put_object(name, next(ts).internal, size, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=policy_index) # track the size of the latest timestamp put for each object @@ -973,7 +1807,8 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(info['delete_timestamp'], '0') if self.__class__ in (TestContainerBrokerBeforeMetadata, TestContainerBrokerBeforeXSync, - TestContainerBrokerBeforeSPI): + TestContainerBrokerBeforeSPI, + TestContainerBrokerBeforeShardRanges): self.assertEqual(info['status_changed_at'], '0') else: self.assertEqual(info['status_changed_at'], @@ -1019,6 +1854,84 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(info['x_container_sync_point1'], -1) self.assertEqual(info['x_container_sync_point2'], -1) + @with_tempdir + def test_get_info_sharding_states(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'part', 'suffix', 'hash', 'hash.db') + broker = ContainerBroker( + db_path, account='myaccount', container='mycontainer') + broker.initialize(next(ts_iter).internal, 0) + broker.put_object('o1', next(ts_iter).internal, 123, 'text/plain', + 'fake etag') + sr = ShardRange('.shards_a/c', next(ts_iter)) + broker.merge_shard_ranges(sr) + + def check_info(expected): + errors = [] + for k, v in expected.items(): + if info.get(k) != v: + errors.append((k, v, info.get(k))) + if errors: + self.fail('Mismatches: %s' % ', '.join( + ['%s should be %s but got %s' % error + for error in errors])) + + # unsharded + with mock.patch.object( + broker, 'get_shard_usage') as mock_get_shard_usage: + info = broker.get_info() + mock_get_shard_usage.assert_not_called() + check_info({'account': 'myaccount', + 'container': 'mycontainer', + 'object_count': 1, + 'bytes_used': 123, + 'db_state': 'unsharded'}) + + # sharding + epoch = next(ts_iter) + broker.enable_sharding(epoch) + self.assertTrue(broker.set_sharding_state()) + broker.put_object('o2', next(ts_iter).internal, 1, 'text/plain', + 'fake etag') + broker.put_object('o3', next(ts_iter).internal, 320, 'text/plain', + 'fake etag') + with mock.patch.object( + broker, 'get_shard_usage') as mock_get_shard_usage: + info = broker.get_info() + mock_get_shard_usage.assert_not_called() + check_info({'account': 'myaccount', + 'container': 'mycontainer', + 'object_count': 1, + 'bytes_used': 123, + 'db_state': 'sharding'}) + + # sharded + self.assertTrue(broker.set_sharded_state()) + shard_stats = {'object_count': 1001, 'bytes_used': 3003} + with mock.patch.object( + broker, 'get_shard_usage') as mock_get_shard_usage: + mock_get_shard_usage.return_value = shard_stats + info = broker.get_info() + mock_get_shard_usage.assert_called_once_with() + check_info({'account': 'myaccount', + 'container': 'mycontainer', + 'object_count': 1001, + 'bytes_used': 3003, + 'db_state': 'sharded'}) + + # collapsed + sr.set_deleted(next(ts_iter)) + broker.merge_shard_ranges(sr) + with mock.patch.object( + broker, 'get_shard_usage') as mock_get_shard_usage: + info = broker.get_info() + mock_get_shard_usage.assert_not_called() + check_info({'account': 'myaccount', + 'container': 'mycontainer', + 'object_count': 2, + 'bytes_used': 321, + 'db_state': 'collapsed'}) + def test_set_x_syncs(self): broker = ContainerBroker(':memory:', account='test1', container='test2') @@ -1100,6 +2013,174 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(info['reported_object_count'], 2) self.assertEqual(info['reported_bytes_used'], 1123) + @with_tempdir + def test_remove_objects(self, tempdir): + objects = (('undeleted', Timestamp.now().internal, 0, 'text/plain', + EMPTY_ETAG, 0, 0), + ('other_policy', Timestamp.now().internal, 0, 'text/plain', + EMPTY_ETAG, 0, 1), + ('deleted', Timestamp.now().internal, 0, 'text/plain', + EMPTY_ETAG, 1, 0)) + object_names = [o[0] for o in objects] + + def get_rows(broker): + with broker.get() as conn: + cursor = conn.execute("SELECT * FROM object") + return [r[1] for r in cursor] + + def do_setup(): + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(Timestamp.now().internal, 0) + for obj in objects: + # ensure row order matches put order + broker.put_object(*obj) + broker._commit_puts() + + self.assertEqual(3, broker.get_max_row()) # sanity check + self.assertEqual(object_names, get_rows(broker)) # sanity check + return broker + + broker = do_setup() + broker.remove_objects('', '') + self.assertFalse(get_rows(broker)) + + broker = do_setup() + broker.remove_objects('deleted', '') + self.assertEqual([object_names[2]], get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', 'deleted', max_row=2) + self.assertEqual(object_names, get_rows(broker)) + + broker = do_setup() + broker.remove_objects('deleted', 'un') + self.assertEqual([object_names[0], object_names[2]], get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=-1) + self.assertEqual(object_names, get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=0) + self.assertEqual(object_names, get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=1) + self.assertEqual(object_names[1:], get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=2) + self.assertEqual(object_names[2:], get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=3) + self.assertFalse(get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=99) + self.assertFalse(get_rows(broker)) + + def test_get_objects(self): + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + ts_iter = make_timestamp_iter() + objects_0 = [{'name': 'obj_0_%d' % i, + 'created_at': next(ts_iter).normal, + 'content_type': 'text/plain', + 'etag': 'etag_%d' % i, + 'size': 1024 * i, + 'deleted': i % 2, + 'storage_policy_index': 0 + } for i in range(1, 8)] + objects_1 = [{'name': 'obj_1_%d' % i, + 'created_at': next(ts_iter).normal, + 'content_type': 'text/plain', + 'etag': 'etag_%d' % i, + 'size': 1024 * i, + 'deleted': i % 2, + 'storage_policy_index': 1 + } for i in range(1, 8)] + # merge_objects mutates items + broker.merge_items([dict(obj) for obj in objects_0 + objects_1]) + + actual = broker.get_objects() + self.assertEqual(objects_0 + objects_1, actual) + + with mock.patch('swift.container.backend.CONTAINER_LISTING_LIMIT', 2): + actual = broker.get_objects() + self.assertEqual(objects_0[:2], actual) + + with mock.patch('swift.container.backend.CONTAINER_LISTING_LIMIT', 2): + actual = broker.get_objects(limit=9) + self.assertEqual(objects_0 + objects_1[:2], actual) + + actual = broker.get_objects(marker=objects_0[2]['name']) + self.assertEqual(objects_0[3:] + objects_1, actual) + + actual = broker.get_objects(end_marker=objects_0[2]['name']) + self.assertEqual(objects_0[:2], actual) + + actual = broker.get_objects(include_deleted=True) + self.assertEqual(objects_0[::2] + objects_1[::2], actual) + + actual = broker.get_objects(include_deleted=False) + self.assertEqual(objects_0[1::2] + objects_1[1::2], actual) + + actual = broker.get_objects(include_deleted=None) + self.assertEqual(objects_0 + objects_1, actual) + + def test_get_objects_since_row(self): + ts_iter = make_timestamp_iter() + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + obj_names = ['obj%03d' % i for i in range(20)] + timestamps = [next(ts_iter) for o in obj_names] + for name, timestamp in zip(obj_names, timestamps): + broker.put_object(name, timestamp.internal, + 0, 'text/plain', EMPTY_ETAG) + broker._commit_puts() # ensure predictable row order + timestamps = [next(ts_iter) for o in obj_names[10:]] + for name, timestamp in zip(obj_names[10:], timestamps): + broker.put_object(name, timestamp.internal, + 0, 'text/plain', EMPTY_ETAG, deleted=1) + broker._commit_puts() # ensure predictable row order + + # sanity check + self.assertEqual(30, broker.get_max_row()) + actual = broker.get_objects() + self.assertEqual(obj_names, [o['name'] for o in actual]) + + # all rows included + actual = broker.get_objects(since_row=None) + self.assertEqual(obj_names, [o['name'] for o in actual]) + + actual = broker.get_objects(since_row=-1) + self.assertEqual(obj_names, [o['name'] for o in actual]) + + # selected rows + for since_row in range(10): + actual = broker.get_objects(since_row=since_row) + with annotate_failure(since_row): + self.assertEqual(obj_names[since_row:], + [o['name'] for o in actual]) + + for since_row in range(10, 20): + actual = broker.get_objects(since_row=since_row) + with annotate_failure(since_row): + self.assertEqual(obj_names[10:], + [o['name'] for o in actual]) + + for since_row in range(20, len(obj_names) + 1): + actual = broker.get_objects(since_row=since_row) + with annotate_failure(since_row): + self.assertEqual(obj_names[since_row - 10:], + [o['name'] for o in actual]) + + self.assertFalse(broker.get_objects(end_marker=obj_names[5], + since_row=5)) + def test_list_objects_iter(self): # Test ContainerBroker.list_objects_iter broker = ContainerBroker(':memory:', account='a', container='c') @@ -1832,6 +2913,21 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(['a', 'b', 'c'], sorted([rec['name'] for rec in items])) + @with_tempdir + def test_merge_items_is_green(self, tempdir): + ts = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts).internal, 1) + + broker.put_object('b', next(ts).internal, 0, 'text/plain', + EMPTY_ETAG) + + with mock.patch('swift.container.backend.tpool') as mock_tpool: + broker.get_info() + mock_tpool.execute.assert_called_once() + def test_merge_items_overwrite_unicode(self): # test DatabaseBroker.merge_items snowman = u'\N{SNOWMAN}'.encode('utf-8') @@ -1930,12 +3026,11 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(rec['content_type'], 'text/plain') def test_set_storage_policy_index(self): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='test_account', container='test_container') timestamp = next(ts) - broker.initialize(timestamp, 0) + broker.initialize(timestamp.internal, 0) info = broker.get_info() self.assertEqual(0, info['storage_policy_index']) # sanity check @@ -1943,42 +3038,44 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(0, info['bytes_used']) if self.__class__ in (TestContainerBrokerBeforeMetadata, TestContainerBrokerBeforeXSync, - TestContainerBrokerBeforeSPI): + TestContainerBrokerBeforeSPI, + TestContainerBrokerBeforeShardRanges): self.assertEqual(info['status_changed_at'], '0') else: - self.assertEqual(timestamp, info['status_changed_at']) + self.assertEqual(timestamp.internal, info['status_changed_at']) expected = {0: {'object_count': 0, 'bytes_used': 0}} self.assertEqual(expected, broker.get_policy_stats()) timestamp = next(ts) - broker.set_storage_policy_index(111, timestamp) + broker.set_storage_policy_index(111, timestamp.internal) self.assertEqual(broker.storage_policy_index, 111) info = broker.get_info() self.assertEqual(111, info['storage_policy_index']) self.assertEqual(0, info['object_count']) self.assertEqual(0, info['bytes_used']) - self.assertEqual(timestamp, info['status_changed_at']) + self.assertEqual(timestamp.internal, info['status_changed_at']) expected[111] = {'object_count': 0, 'bytes_used': 0} self.assertEqual(expected, broker.get_policy_stats()) timestamp = next(ts) - broker.set_storage_policy_index(222, timestamp) + broker.set_storage_policy_index(222, timestamp.internal) self.assertEqual(broker.storage_policy_index, 222) info = broker.get_info() self.assertEqual(222, info['storage_policy_index']) self.assertEqual(0, info['object_count']) self.assertEqual(0, info['bytes_used']) - self.assertEqual(timestamp, info['status_changed_at']) + self.assertEqual(timestamp.internal, info['status_changed_at']) expected[222] = {'object_count': 0, 'bytes_used': 0} self.assertEqual(expected, broker.get_policy_stats()) old_timestamp, timestamp = timestamp, next(ts) - broker.set_storage_policy_index(222, timestamp) # it's idempotent + # setting again is idempotent + broker.set_storage_policy_index(222, timestamp.internal) info = broker.get_info() self.assertEqual(222, info['storage_policy_index']) self.assertEqual(0, info['object_count']) self.assertEqual(0, info['bytes_used']) - self.assertEqual(old_timestamp, info['status_changed_at']) + self.assertEqual(old_timestamp.internal, info['status_changed_at']) self.assertEqual(expected, broker.get_policy_stats()) def test_set_storage_policy_index_empty(self): @@ -2004,19 +3101,18 @@ class TestContainerBroker(unittest.TestCase): @with_tempdir def test_legacy_pending_files(self, tempdir): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() db_path = os.path.join(tempdir, 'container.db') # first init an acct DB without the policy_stat table present broker = ContainerBroker(db_path, account='a', container='c') - broker.initialize(next(ts), 1) + broker.initialize(next(ts).internal, 1) # manually make some pending entries lacking storage_policy_index with open(broker.pending_file, 'a+b') as fp: for i in range(10): name, timestamp, size, content_type, etag, deleted = ( - 'o%s' % i, next(ts), 0, 'c', 'e', 0) + 'o%s' % i, next(ts).internal, 0, 'c', 'e', 0) fp.write(':') fp.write(pickle.dumps( (name, timestamp, size, content_type, etag, deleted), @@ -2033,7 +3129,7 @@ class TestContainerBroker(unittest.TestCase): else: size = 2 storage_policy_index = 1 - broker.put_object(name, next(ts), size, 'c', 'e', 0, + broker.put_object(name, next(ts).internal, size, 'c', 'e', 0, storage_policy_index=storage_policy_index) broker._commit_puts_stale_ok() @@ -2049,8 +3145,7 @@ class TestContainerBroker(unittest.TestCase): @with_tempdir def test_get_info_no_stale_reads(self, tempdir): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() db_path = os.path.join(tempdir, 'container.db') def mock_commit_puts(): @@ -2058,13 +3153,13 @@ class TestContainerBroker(unittest.TestCase): broker = ContainerBroker(db_path, account='a', container='c', stale_reads_ok=False) - broker.initialize(next(ts), 1) + broker.initialize(next(ts).internal, 1) # manually make some pending entries with open(broker.pending_file, 'a+b') as fp: for i in range(10): name, timestamp, size, content_type, etag, deleted = ( - 'o%s' % i, next(ts), 0, 'c', 'e', 0) + 'o%s' % i, next(ts).internal, 0, 'c', 'e', 0) fp.write(':') fp.write(pickle.dumps( (name, timestamp, size, content_type, etag, deleted), @@ -2079,8 +3174,7 @@ class TestContainerBroker(unittest.TestCase): @with_tempdir def test_get_info_stale_read_ok(self, tempdir): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() db_path = os.path.join(tempdir, 'container.db') def mock_commit_puts(): @@ -2088,13 +3182,13 @@ class TestContainerBroker(unittest.TestCase): broker = ContainerBroker(db_path, account='a', container='c', stale_reads_ok=True) - broker.initialize(next(ts), 1) + broker.initialize(next(ts).internal, 1) # manually make some pending entries with open(broker.pending_file, 'a+b') as fp: for i in range(10): name, timestamp, size, content_type, etag, deleted = ( - 'o%s' % i, next(ts), 0, 'c', 'e', 0) + 'o%s' % i, next(ts).internal, 0, 'c', 'e', 0) fp.write(':') fp.write(pickle.dumps( (name, timestamp, size, content_type, etag, deleted), @@ -2104,6 +3198,1257 @@ class TestContainerBroker(unittest.TestCase): broker._commit_puts = mock_commit_puts broker.get_info() + @with_tempdir + def test_create_broker(self, tempdir): + broker = ContainerBroker.create_broker(tempdir, 0, 'a', 'c') + hsh = hash_path('a', 'c') + expected_path = os.path.join( + tempdir, 'containers', '0', hsh[-3:], hsh, hsh + '.db') + self.assertEqual(expected_path, broker.db_file) + self.assertTrue(os.path.isfile(expected_path)) + + ts = Timestamp.now() + broker = ContainerBroker.create_broker(tempdir, 0, 'a', 'c1', + put_timestamp=ts.internal) + hsh = hash_path('a', 'c1') + expected_path = os.path.join( + tempdir, 'containers', '0', hsh[-3:], hsh, hsh + '.db') + self.assertEqual(expected_path, broker.db_file) + self.assertTrue(os.path.isfile(expected_path)) + self.assertEqual(ts.internal, broker.get_info()['put_timestamp']) + self.assertEqual(0, broker.get_info()['storage_policy_index']) + + epoch = Timestamp.now() + broker = ContainerBroker.create_broker(tempdir, 0, 'a', 'c3', + epoch=epoch) + hsh = hash_path('a', 'c3') + expected_path = os.path.join( + tempdir, 'containers', '0', hsh[-3:], + hsh, '%s_%s.db' % (hsh, epoch.internal)) + self.assertEqual(expected_path, broker.db_file) + + @with_tempdir + def test_pending_file_name(self, tempdir): + # pending file should have same name for sharded or unsharded db + expected_pending_path = os.path.join(tempdir, 'container.db.pending') + + db_path = os.path.join(tempdir, 'container.db') + fresh_db_path = os.path.join(tempdir, 'container_epoch.db') + + def do_test(given_db_file, expected_db_file): + broker = ContainerBroker(given_db_file, account='a', container='c') + self.assertEqual(expected_pending_path, broker.pending_file) + self.assertEqual(expected_db_file, broker.db_file) + + # no files exist + do_test(db_path, db_path) + do_test(fresh_db_path, fresh_db_path) + + # only container.db exists - unsharded + with open(db_path, 'wb'): + pass + do_test(db_path, db_path) + do_test(fresh_db_path, db_path) + + # container.db and container_shard.db exist - sharding + with open(fresh_db_path, 'wb'): + pass + do_test(db_path, fresh_db_path) + do_test(fresh_db_path, fresh_db_path) + + # only container_shard.db exists - sharded + os.unlink(db_path) + do_test(db_path, fresh_db_path) + do_test(fresh_db_path, fresh_db_path) + + @with_tempdir + def test_sharding_sysmeta(self, tempdir): + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='myaccount', container='mycontainer') + broker.initialize(Timestamp.now().internal) + + expected = 'aaa/ccc' + with mock_timestamp_now() as now: + broker.set_sharding_sysmeta('Root', expected) + actual = broker.metadata + self.assertEqual([expected, now.internal], + actual.get('X-Container-Sysmeta-Shard-Root')) + self.assertEqual(expected, broker.get_sharding_sysmeta('Root')) + + expected = {'key': 'value'} + with mock_timestamp_now() as now: + broker.set_sharding_sysmeta('test', expected) + actual = broker.metadata + self.assertEqual([expected, now.internal], + actual.get('X-Container-Sysmeta-Shard-test')) + self.assertEqual(expected, broker.get_sharding_sysmeta('test')) + + @with_tempdir + def test_path(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='myaccount', container='mycontainer') + broker.initialize(next(ts_iter).internal, 1) + # make sure we can cope with unitialized account and container + broker.account = broker.container = None + self.assertEqual('myaccount/mycontainer', broker.path) + + @with_tempdir + def test_root_account_container_path(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='root_a', container='root_c') + broker.initialize(next(ts_iter).internal, 1) + # make sure we can cope with unitialized account and container + broker.account = broker.container = None + + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertTrue(broker.is_root_container()) + self.assertEqual('root_a', broker.account) # sanity check + self.assertEqual('root_c', broker.container) # sanity check + + # we don't expect root containers to have this sysmeta set but if it is + # the broker should still behave like a root container + metadata = { + 'X-Container-Sysmeta-Shard-Root': + ('root_a/root_c', next(ts_iter).internal)} + broker = ContainerBroker( + db_path, account='root_a', container='root_c') + broker.update_metadata(metadata) + broker.account = broker.container = None + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertTrue(broker.is_root_container()) + + # if root is marked deleted, it still considers itself to be a root + broker.delete_db(next(ts_iter).internal) + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertTrue(broker.is_root_container()) + # check the values are not just being cached + broker = ContainerBroker(db_path) + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertTrue(broker.is_root_container()) + + # check a shard container + db_path = os.path.join(tempdir, 'shard_container.db') + broker = ContainerBroker( + db_path, account='.shards_root_a', container='c_shard') + broker.initialize(next(ts_iter).internal, 1) + # now the metadata is significant... + metadata = { + 'X-Container-Sysmeta-Shard-Root': + ('root_a/root_c', next(ts_iter).internal)} + broker.update_metadata(metadata) + broker.account = broker.container = None + broker._root_account = broker._root_container = None + + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertFalse(broker.is_root_container()) + + # check validation + def check_validation(root_value): + metadata = { + 'X-Container-Sysmeta-Shard-Root': + (root_value, next(ts_iter).internal)} + broker.update_metadata(metadata) + broker.account = broker.container = None + broker._root_account = broker._root_container = None + with self.assertRaises(ValueError) as cm: + broker.root_account + self.assertIn('Expected X-Container-Sysmeta-Shard-Root', + str(cm.exception)) + with self.assertRaises(ValueError): + broker.root_container + + check_validation('root_a') + check_validation('/root_a') + check_validation('/root_a/root_c') + check_validation('/root_a/root_c/blah') + check_validation('/') + + def test_resolve_shard_range_states(self): + self.assertIsNone(ContainerBroker.resolve_shard_range_states(None)) + self.assertIsNone(ContainerBroker.resolve_shard_range_states([])) + + for state_num, state_name in ShardRange.STATES.items(): + self.assertEqual({state_num}, + ContainerBroker.resolve_shard_range_states( + [state_name])) + self.assertEqual({state_num}, + ContainerBroker.resolve_shard_range_states( + [state_num])) + + self.assertEqual(set(ShardRange.STATES), + ContainerBroker.resolve_shard_range_states( + ShardRange.STATES_BY_NAME)) + + self.assertEqual( + set(ShardRange.STATES), + ContainerBroker.resolve_shard_range_states(ShardRange.STATES)) + + # check aliases + self.assertEqual( + {ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING}, + ContainerBroker.resolve_shard_range_states(['listing'])) + + self.assertEqual( + {ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING}, + ContainerBroker.resolve_shard_range_states(['listing', 'active'])) + + self.assertEqual( + {ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING, ShardRange.CREATED}, + ContainerBroker.resolve_shard_range_states(['listing', 'created'])) + + self.assertEqual( + {ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE, + ShardRange.SHARDING}, + ContainerBroker.resolve_shard_range_states(['updating'])) + + self.assertEqual( + {ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE, + ShardRange.SHARDING, ShardRange.SHRINKING}, + ContainerBroker.resolve_shard_range_states( + ['updating', 'listing'])) + + def check_bad_value(value): + with self.assertRaises(ValueError) as cm: + ContainerBroker.resolve_shard_range_states(value) + self.assertIn('Invalid state', str(cm.exception)) + + check_bad_value(['bad_state', 'active']) + check_bad_value(['']) + check_bad_value('active') + + @with_tempdir + def test_get_shard_ranges(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + + # no rows + self.assertFalse(broker.get_shard_ranges()) + # check that a default own shard range is not generated + self.assertFalse(broker.get_shard_ranges(include_own=True)) + + # merge row for own shard range + own_shard_range = ShardRange(broker.path, next(ts_iter), 'l', 'u', + state=ShardRange.SHARDING) + broker.merge_shard_ranges([own_shard_range]) + self.assertFalse(broker.get_shard_ranges()) + self.assertFalse(broker.get_shard_ranges(include_own=False)) + + actual = broker.get_shard_ranges(include_own=True) + self.assertEqual([dict(sr) for sr in [own_shard_range]], + [dict(sr) for sr in actual]) + + # merge rows for other shard ranges + shard_ranges = [ + ShardRange('.a/c0', next(ts_iter), 'a', 'c'), + ShardRange('.a/c1', next(ts_iter), 'c', 'd'), + ShardRange('.a/c2', next(ts_iter), 'd', 'f', + state=ShardRange.ACTIVE), + ShardRange('.a/c3', next(ts_iter), 'e', 'f', deleted=1, + state=ShardRange.SHARDED,), + ShardRange('.a/c4', next(ts_iter), 'f', 'h', + state=ShardRange.CREATED), + ShardRange('.a/c5', next(ts_iter), 'h', 'j', deleted=1) + ] + broker.merge_shard_ranges(shard_ranges) + actual = broker.get_shard_ranges() + undeleted = shard_ranges[:3] + shard_ranges[4:5] + self.assertEqual([dict(sr) for sr in undeleted], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(include_deleted=True) + self.assertEqual([dict(sr) for sr in shard_ranges], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(reverse=True) + self.assertEqual([dict(sr) for sr in reversed(undeleted)], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(marker='c', end_marker='e') + self.assertEqual([dict(sr) for sr in shard_ranges[1:3]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(marker='c', end_marker='e', + states=ShardRange.ACTIVE) + self.assertEqual([dict(sr) for sr in shard_ranges[2:3]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(marker='e', end_marker='e') + self.assertFalse([dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(includes='f') + self.assertEqual([dict(sr) for sr in shard_ranges[2:3]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(includes='i') + self.assertFalse(actual) + + actual = broker.get_shard_ranges( + states=[ShardRange.CREATED, ShardRange.ACTIVE]) + self.assertEqual( + [dict(sr) for sr in [shard_ranges[2], shard_ranges[4]]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(exclude_states=ShardRange.CREATED) + self.assertEqual([dict(sr) for sr in shard_ranges[:3]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges( + exclude_states=[ShardRange.CREATED, ShardRange.ACTIVE]) + self.assertEqual([dict(sr) for sr in shard_ranges[:2]], + [dict(sr) for sr in actual]) + + # exclude_states takes precedence + actual = broker.get_shard_ranges( + states=ShardRange.CREATED, exclude_states=ShardRange.CREATED) + self.assertEqual([dict(sr) for sr in shard_ranges[:3]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(states=[ShardRange.CREATED], + exclude_states=[ShardRange.ACTIVE]) + self.assertEqual([dict(sr) for sr in shard_ranges[4:5]], + [dict(sr) for sr in actual]) + + # get everything + actual = broker.get_shard_ranges(include_own=True) + self.assertEqual([dict(sr) for sr in undeleted + [own_shard_range]], + [dict(sr) for sr in actual]) + + # get just own range + actual = broker.get_shard_ranges(include_own=True, exclude_others=True) + self.assertEqual([dict(sr) for sr in [own_shard_range]], + [dict(sr) for sr in actual]) + + # exclude_states overrides include_own + actual = broker.get_shard_ranges(include_own=True, + exclude_states=ShardRange.SHARDING, + exclude_others=True) + self.assertFalse(actual) + + # if you ask for nothing you'll get nothing + actual = broker.get_shard_ranges( + include_own=False, exclude_others=True) + self.assertFalse(actual) + + @with_tempdir + def test_get_shard_ranges_with_sharding_overlaps(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + shard_ranges = [ + ShardRange('.shards_a/c0', next(ts_iter), 'a', 'd', + state=ShardRange.ACTIVE), + ShardRange('.shards_a/c1_0', next(ts_iter), 'd', 'g', + state=ShardRange.CLEAVED), + ShardRange('.shards_a/c1_1', next(ts_iter), 'g', 'j', + state=ShardRange.CLEAVED), + ShardRange('.shards_a/c1_2', next(ts_iter), 'j', 'm', + state=ShardRange.CREATED), + ShardRange('.shards_a/c1', next(ts_iter), 'd', 'm', + state=ShardRange.SHARDING), + ShardRange('.shards_a/c2', next(ts_iter), 'm', '', + state=ShardRange.ACTIVE), + ] + broker.merge_shard_ranges( + random.sample(shard_ranges, len(shard_ranges))) + actual = broker.get_shard_ranges() + self.assertEqual([dict(sr) for sr in shard_ranges], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(states=SHARD_LISTING_STATES) + self.assertEqual( + [dict(sr) for sr in shard_ranges[:3] + shard_ranges[4:]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES, + includes='e') + self.assertEqual([shard_ranges[1]], actual) + actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES, + includes='j') + self.assertEqual([shard_ranges[2]], actual) + actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES, + includes='k') + self.assertEqual([shard_ranges[3]], actual) + + @with_tempdir + def test_get_shard_ranges_with_shrinking_overlaps(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + shard_ranges = [ + ShardRange('.shards_a/c0', next(ts_iter), 'a', 'k', + state=ShardRange.ACTIVE), + ShardRange('.shards_a/c1', next(ts_iter), 'k', 'm', + state=ShardRange.SHRINKING), + ShardRange('.shards_a/c2', next(ts_iter), 'k', 't', + state=ShardRange.ACTIVE), + ShardRange('.shards_a/c3', next(ts_iter), 't', '', + state=ShardRange.ACTIVE), + ] + broker.merge_shard_ranges( + random.sample(shard_ranges, len(shard_ranges))) + actual = broker.get_shard_ranges() + self.assertEqual([dict(sr) for sr in shard_ranges], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES, + includes='l') + self.assertEqual([shard_ranges[2]], actual) + + @with_tempdir + def test_get_own_shard_range(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='.shards_a', container='shard_c') + broker.initialize(next(ts_iter).internal, 0) + + # no row for own shard range - expect entire namespace default + now = Timestamp.now() + expected = ShardRange(broker.path, now, '', '', 0, 0, now, + state=ShardRange.ACTIVE) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + actual = broker.get_own_shard_range(no_default=True) + self.assertIsNone(actual) + + # row for own shard range and others + ts_1 = next(ts_iter) + own_sr = ShardRange(broker.path, ts_1, 'l', 'u') + broker.merge_shard_ranges( + [own_sr, + ShardRange('.a/c1', next(ts_iter), 'b', 'c'), + ShardRange('.a/c2', next(ts_iter), 'c', 'd')]) + expected = ShardRange(broker.path, ts_1, 'l', 'u', 0, 0, now) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + # check stats get updated + broker.put_object( + 'o1', next(ts_iter).internal, 100, 'text/plain', 'etag1') + broker.put_object( + 'o2', next(ts_iter).internal, 99, 'text/plain', 'etag2') + expected = ShardRange( + broker.path, ts_1, 'l', 'u', 2, 199, now) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + # still returned when deleted + delete_ts = next(ts_iter) + own_sr.set_deleted(timestamp=delete_ts) + broker.merge_shard_ranges(own_sr) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + expected = ShardRange( + broker.path, delete_ts, 'l', 'u', 2, 199, now, deleted=True) + self.assertEqual(dict(expected), dict(actual)) + + # still in table after reclaim_age + broker.reclaim(next(ts_iter).internal, next(ts_iter).internal) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + # entire namespace + ts_2 = next(ts_iter) + broker.merge_shard_ranges( + [ShardRange(broker.path, ts_2, '', '')]) + expected = ShardRange( + broker.path, ts_2, '', '', 2, 199, now) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + @with_tempdir + def test_enable_sharding(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='.shards_a', container='shard_c') + broker.initialize(next(ts_iter).internal, 0) + epoch = next(ts_iter) + broker.enable_sharding(epoch) + own_sr = broker.get_own_shard_range(no_default=True) + self.assertEqual(epoch, own_sr.epoch) + self.assertEqual(epoch, own_sr.state_timestamp) + self.assertEqual(ShardRange.SHARDING, own_sr.state) + + @with_tempdir + def test_get_shard_usage(self, tempdir): + ts_iter = make_timestamp_iter() + shard_range_by_state = dict( + (state, ShardRange('.shards_a/c_%s' % state, next(ts_iter), + str(state), str(state + 1), + 2 * state, 2 * state + 1, 2, + state=state)) + for state in ShardRange.STATES) + + def make_broker(a, c): + db_path = os.path.join(tempdir, '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account=a, container=c) + broker.initialize(next(ts_iter).internal, 0) + broker.set_sharding_sysmeta('Root', 'a/c') + broker.merge_shard_ranges(shard_range_by_state.values()) + return broker + + # make broker appear to be a root container + broker = make_broker('a', 'c') + self.assertTrue(broker.is_root_container()) + included_states = (ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING) + included = [shard_range_by_state[state] for state in included_states] + expected = { + 'object_count': sum([sr.object_count for sr in included]), + 'bytes_used': sum([sr.bytes_used for sr in included]) + } + self.assertEqual(expected, broker.get_shard_usage()) + + @with_tempdir + def _check_find_shard_ranges(self, c_lower, c_upper, tempdir): + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() + container_name = 'test_container' + + def do_test(expected_bounds, expected_last_found, shard_size, limit, + start_index=0, existing=None): + # expected_bounds is a list of tuples (lower, upper, object_count) + # build expected shard ranges + expected_shard_ranges = [ + dict(lower=lower, upper=upper, index=index, + object_count=object_count) + for index, (lower, upper, object_count) + in enumerate(expected_bounds, start_index)] + + with mock.patch('swift.common.utils.time.time', + return_value=float(ts_now.normal)): + ranges, last_found = broker.find_shard_ranges( + shard_size, limit=limit, existing_ranges=existing) + self.assertEqual(expected_shard_ranges, ranges) + self.assertEqual(expected_last_found, last_found) + + db_path = os.path.join(tempdir, 'test_container.db') + broker = ContainerBroker( + db_path, account='a', container=container_name) + # shard size > object count, no objects + broker.initialize(next(ts_iter).internal, 0) + + ts = next(ts_iter) + if c_lower or c_upper: + # testing a shard, so set its own shard range + own_shard_range = ShardRange(broker.path, ts, c_lower, c_upper) + broker.merge_shard_ranges([own_shard_range]) + + self.assertEqual(([], False), broker.find_shard_ranges(10)) + + for i in range(10): + broker.put_object( + 'obj%02d' % i, next(ts_iter).internal, 0, 'text/plain', 'etag') + + expected_bounds = [(c_lower, 'obj04', 5), ('obj04', c_upper, 5)] + do_test(expected_bounds, True, shard_size=5, limit=None) + + expected = [(c_lower, 'obj06', 7), ('obj06', c_upper, 3)] + do_test(expected, True, shard_size=7, limit=None) + expected = [(c_lower, 'obj08', 9), ('obj08', c_upper, 1)] + do_test(expected, True, shard_size=9, limit=None) + # shard size >= object count + do_test([], False, shard_size=10, limit=None) + do_test([], False, shard_size=11, limit=None) + + # check use of limit + do_test([], False, shard_size=4, limit=0) + expected = [(c_lower, 'obj03', 4)] + do_test(expected, False, shard_size=4, limit=1) + expected = [(c_lower, 'obj03', 4), ('obj03', 'obj07', 4)] + do_test(expected, False, shard_size=4, limit=2) + expected = [(c_lower, 'obj03', 4), ('obj03', 'obj07', 4), + ('obj07', c_upper, 2)] + do_test(expected, True, shard_size=4, limit=3) + do_test(expected, True, shard_size=4, limit=4) + do_test(expected, True, shard_size=4, limit=-1) + + # increase object count to 11 + broker.put_object( + 'obj10', next(ts_iter).internal, 0, 'text/plain', 'etag') + expected = [(c_lower, 'obj03', 4), ('obj03', 'obj07', 4), + ('obj07', c_upper, 3)] + do_test(expected, True, shard_size=4, limit=None) + + expected = [(c_lower, 'obj09', 10), ('obj09', c_upper, 1)] + do_test(expected, True, shard_size=10, limit=None) + do_test([], False, shard_size=11, limit=None) + + # now pass in a pre-existing shard range + existing = [ShardRange( + '.shards_a/srange-0', Timestamp.now(), '', 'obj03', + object_count=4, state=ShardRange.FOUND)] + + expected = [('obj03', 'obj07', 4), ('obj07', c_upper, 3)] + do_test(expected, True, shard_size=4, limit=None, start_index=1, + existing=existing) + expected = [('obj03', 'obj07', 4)] + do_test(expected, False, shard_size=4, limit=1, start_index=1, + existing=existing) + # using increased shard size should not distort estimation of progress + expected = [('obj03', 'obj09', 6), ('obj09', c_upper, 1)] + do_test(expected, True, shard_size=6, limit=None, start_index=1, + existing=existing) + + # add another existing... + existing.append(ShardRange( + '.shards_a/srange-1', Timestamp.now(), '', 'obj07', + object_count=4, state=ShardRange.FOUND)) + expected = [('obj07', c_upper, 3)] + do_test(expected, True, shard_size=10, limit=None, start_index=2, + existing=existing) + # an existing shard range not in FOUND state should not distort + # estimation of progress, but may cause final range object count to + # default to shard_size + existing[-1].state = ShardRange.CREATED + existing[-1].object_count = 10 + # there's only 3 objects left to scan but progress cannot be reliably + # calculated, so final shard range has object count of 2 + expected = [('obj07', 'obj09', 2), ('obj09', c_upper, 2)] + do_test(expected, True, shard_size=2, limit=None, start_index=2, + existing=existing) + + # add last shard range so there's none left to find + existing.append(ShardRange( + '.shards_a/srange-2', Timestamp.now(), 'obj07', c_upper, + object_count=4, state=ShardRange.FOUND)) + do_test([], True, shard_size=4, limit=None, existing=existing) + + def test_find_shard_ranges(self): + self._check_find_shard_ranges('', '') + self._check_find_shard_ranges('', 'upper') + self._check_find_shard_ranges('lower', '') + self._check_find_shard_ranges('lower', 'upper') + + @with_tempdir + def test_find_shard_ranges_with_misplaced_objects(self, tempdir): + # verify that misplaced objects outside of a shard's range do not + # influence choice of shard ranges (but do distort the object counts) + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() + container_name = 'test_container' + + db_path = os.path.join(tempdir, 'test_container.db') + broker = ContainerBroker( + db_path, account='a', container=container_name) + # shard size > object count, no objects + broker.initialize(next(ts_iter).internal, 0) + + ts = next(ts_iter) + own_shard_range = ShardRange(broker.path, ts, 'l', 'u') + broker.merge_shard_ranges([own_shard_range]) + + self.assertEqual(([], False), broker.find_shard_ranges(10)) + + for name in ('a-misplaced', 'm', 'n', 'p', 'q', 'r', 'z-misplaced'): + broker.put_object( + name, next(ts_iter).internal, 0, 'text/plain', 'etag') + + expected_bounds = ( + ('l', 'n', 2), # contains m, n + ('n', 'q', 2), # contains p, q + ('q', 'u', 3) # contains r; object count distorted by 2 misplaced + ) + expected_shard_ranges = [ + dict(lower=lower, upper=upper, index=index, + object_count=object_count) + for index, (lower, upper, object_count) + in enumerate(expected_bounds)] + + with mock.patch('swift.common.utils.time.time', + return_value=float(ts_now.normal)): + actual_shard_ranges, last_found = broker.find_shard_ranges(2, -1) + self.assertEqual(expected_shard_ranges, actual_shard_ranges) + + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() + container_name = 'test_container' + + @with_tempdir + def test_find_shard_ranges_errors(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'test_container.db') + broker = ContainerBroker(db_path, account='a', container='c', + logger=FakeLogger()) + broker.initialize(next(ts_iter).internal, 0) + for i in range(2): + broker.put_object( + 'obj%d' % i, next(ts_iter).internal, 0, 'text/plain', 'etag') + + klass = 'swift.container.backend.ContainerBroker' + with mock.patch(klass + '._get_next_shard_range_upper', + side_effect=LockTimeout()): + ranges, last_found = broker.find_shard_ranges(1) + self.assertFalse(ranges) + self.assertFalse(last_found) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Problem finding shard upper', lines[0]) + self.assertFalse(lines[1:]) + + broker.logger.clear() + with mock.patch(klass + '._get_next_shard_range_upper', + side_effect=sqlite3.OperationalError()): + ranges, last_found = broker.find_shard_ranges(1) + self.assertFalse(last_found) + self.assertFalse(ranges) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Problem finding shard upper', lines[0]) + self.assertFalse(lines[1:]) + + @with_tempdir + def test_set_db_states(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + + # load up the broker with some objects + objects = [{'name': 'obj_%d' % i, + 'created_at': next(ts_iter).normal, + 'content_type': 'text/plain', + 'etag': 'etag_%d' % i, + 'size': 1024 * i, + 'deleted': 0, + 'storage_policy_index': 0, + } for i in range(1, 6)] + # merge_items mutates items + broker.merge_items([dict(obj) for obj in objects]) + original_info = broker.get_info() + + # Add some metadata + meta = { + 'X-Container-Meta-Color': ['Blue', next(ts_iter).normal], + 'X-Container-Meta-Cleared': ['', next(ts_iter).normal], + 'X-Container-Sysmeta-Shape': ['Circle', next(ts_iter).normal], + } + broker.update_metadata(meta) + + # Add some syncs + incoming_sync = {'remote_id': 'incoming_123', 'sync_point': 1} + outgoing_sync = {'remote_id': 'outgoing_123', 'sync_point': 2} + broker.merge_syncs([outgoing_sync], incoming=False) + broker.merge_syncs([incoming_sync], incoming=True) + + # Add some ShardRanges + shard_ranges = [ShardRange( + name='.shards_a/shard_range_%s' % i, + timestamp=next(ts_iter), lower='obj_%d' % i, + upper='obj_%d' % (i + 2), + object_count=len(objects[i:i + 2]), + bytes_used=sum(obj['size'] for obj in objects[i:i + 2]), + meta_timestamp=next(ts_iter)) for i in range(0, 6, 2)] + deleted_range = ShardRange('.shards_a/shard_range_z', next(ts_iter), + 'z', '', state=ShardRange.SHARDED, + deleted=1) + own_sr = ShardRange(name='a/c', timestamp=next(ts_iter), + state=ShardRange.ACTIVE) + broker.merge_shard_ranges([own_sr] + shard_ranges + [deleted_range]) + ts_epoch = next(ts_iter) + new_db_path = os.path.join(tempdir, 'part', 'suffix', 'hash', + 'container_%s.db' % ts_epoch.normal) + + def check_broker_properties(broker): + # these broker properties should remain unchanged as state changes + self.assertEqual(broker.get_max_row(), 5) + all_metadata = broker.metadata + original_meta = dict((k, all_metadata[k]) for k in meta) + self.assertEqual(original_meta, meta) + self.assertEqual(broker.get_syncs(True)[0], incoming_sync) + self.assertEqual(broker.get_syncs(False)[0], outgoing_sync) + self.assertEqual(shard_ranges + [own_sr, deleted_range], + broker.get_shard_ranges(include_own=True, + include_deleted=True)) + + def check_broker_info(actual_info): + for key in ('db_state', 'id', 'hash'): + actual_info.pop(key, None) + original_info.pop(key, None) + self.assertEqual(original_info, actual_info) + + def check_unsharded_state(broker): + # these are expected properties in unsharded state + self.assertEqual(len(broker.get_brokers()), 1) + self.assertEqual(broker.get_db_state(), UNSHARDED) + self.assertTrue(os.path.exists(db_path)) + self.assertFalse(os.path.exists(new_db_path)) + self.assertEqual(objects, broker.get_objects()) + + # Sanity checks + check_broker_properties(broker) + check_unsharded_state(broker) + check_broker_info(broker.get_info()) + + # first test that moving from UNSHARDED to SHARDED doesn't work + self.assertFalse(broker.set_sharded_state()) + # check nothing changed + check_broker_properties(broker) + check_broker_info(broker.get_info()) + check_unsharded_state(broker) + + # cannot go to SHARDING without an epoch set + self.assertFalse(broker.set_sharding_state()) + + # now set sharding epoch and make sure everything moves. + broker.enable_sharding(ts_epoch) + self.assertTrue(broker.set_sharding_state()) + check_broker_properties(broker) + check_broker_info(broker.get_info()) + + def check_sharding_state(broker): + self.assertEqual(len(broker.get_brokers()), 2) + self.assertEqual(broker.get_db_state(), SHARDING) + self.assertTrue(os.path.exists(db_path)) + self.assertTrue(os.path.exists(new_db_path)) + self.assertEqual([], broker.get_objects()) + self.assertEqual(objects, broker.get_brokers()[0].get_objects()) + check_sharding_state(broker) + + # to confirm we're definitely looking at the shard db + broker2 = ContainerBroker(new_db_path) + check_broker_properties(broker2) + check_broker_info(broker2.get_info()) + self.assertEqual([], broker2.get_objects()) + + # Try to set sharding state again + self.assertFalse(broker.set_sharding_state()) + # check nothing changed + check_broker_properties(broker) + check_broker_info(broker.get_info()) + check_sharding_state(broker) + + # Now move to the final state - update shard ranges' state + broker.merge_shard_ranges( + [dict(sr, state=ShardRange.ACTIVE, + state_timestamp=next(ts_iter).internal) + for sr in shard_ranges]) + # pretend all ranges have been cleaved + self.assertTrue(broker.set_sharded_state()) + check_broker_properties(broker) + check_broker_info(broker.get_info()) + + def check_sharded_state(broker): + self.assertEqual(broker.get_db_state(), SHARDED) + self.assertEqual(len(broker.get_brokers()), 1) + self.assertFalse(os.path.exists(db_path)) + self.assertTrue(os.path.exists(new_db_path)) + self.assertEqual([], broker.get_objects()) + check_sharded_state(broker) + + # Try to set sharded state again + self.assertFalse(broker.set_sharded_state()) + # check nothing changed + check_broker_properties(broker) + check_broker_info(broker.get_info()) + check_sharded_state(broker) + + # delete the container - sharding sysmeta gets erased + broker.delete_db(next(ts_iter).internal) + # but it is not considered deleted while shards have content + self.assertFalse(broker.is_deleted()) + check_sharded_state(broker) + # empty the shard ranges + empty_shard_ranges = [sr.copy(object_count=0, bytes_used=0, + meta_timestamp=next(ts_iter)) + for sr in shard_ranges] + broker.merge_shard_ranges(empty_shard_ranges) + # and no it is deleted + self.assertTrue(broker.is_deleted()) + check_sharded_state(broker) + + def do_revive_shard_delete(shard_ranges): + # delete all shard ranges + deleted_shard_ranges = [sr.copy(timestamp=next(ts_iter), deleted=1) + for sr in shard_ranges] + broker.merge_shard_ranges(deleted_shard_ranges) + self.assertEqual(COLLAPSED, broker.get_db_state()) + + # add new shard ranges and go to sharding state - need to force + # broker time to be after the delete time in order to write new + # sysmeta + broker.enable_sharding(next(ts_iter)) + shard_ranges = [sr.copy(timestamp=next(ts_iter)) + for sr in shard_ranges] + broker.merge_shard_ranges(shard_ranges) + with mock.patch('swift.common.db.time.time', + lambda: float(next(ts_iter))): + self.assertTrue(broker.set_sharding_state()) + self.assertEqual(SHARDING, broker.get_db_state()) + + # go to sharded + self.assertTrue( + broker.set_sharded_state()) + self.assertEqual(SHARDED, broker.get_db_state()) + + # delete again + broker.delete_db(next(ts_iter).internal) + self.assertTrue(broker.is_deleted()) + self.assertEqual(SHARDED, broker.get_db_state()) + + do_revive_shard_delete(shard_ranges) + do_revive_shard_delete(shard_ranges) + + @with_tempdir + def test_set_sharding_state_errors(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c', + logger=FakeLogger()) + broker.initialize(next(ts_iter).internal, 0) + broker.enable_sharding(next(ts_iter)) + + orig_execute = GreenDBConnection.execute + trigger = 'INSERT into object' + + def mock_execute(conn, *args, **kwargs): + if trigger in args[0]: + raise sqlite3.OperationalError() + return orig_execute(conn, *args, **kwargs) + + with mock.patch('swift.common.db.GreenDBConnection.execute', + mock_execute): + res = broker.set_sharding_state() + self.assertFalse(res) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Failed to set the ROWID', lines[0]) + self.assertFalse(lines[1:]) + + broker.logger.clear() + trigger = 'UPDATE container_stat SET created_at' + with mock.patch('swift.common.db.GreenDBConnection.execute', + mock_execute): + res = broker.set_sharding_state() + self.assertFalse(res) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Failed to set matching', lines[0]) + self.assertFalse(lines[1:]) + + @with_tempdir + def test_set_sharded_state_errors(self, tempdir): + ts_iter = make_timestamp_iter() + retiring_db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(retiring_db_path, account='a', container='c', + logger=FakeLogger()) + broker.initialize(next(ts_iter).internal, 0) + pre_epoch = next(ts_iter) + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + # unlink fails + with mock.patch('os.unlink', side_effect=OSError(errno.EPERM)): + self.assertFalse(broker.set_sharded_state()) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Failed to unlink', lines[0]) + self.assertFalse(lines[1:]) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertTrue(os.path.exists(retiring_db_path)) + self.assertTrue(os.path.exists(broker.db_file)) + + # extra files + extra_filename = make_db_file_path(broker.db_file, pre_epoch) + self.assertNotEqual(extra_filename, broker.db_file) # sanity check + with open(extra_filename, 'wb'): + pass + broker.logger.clear() + self.assertFalse(broker.set_sharded_state()) + lines = broker.logger.get_lines_for_level('warning') + self.assertIn('Still have multiple db files', lines[0]) + self.assertFalse(lines[1:]) + self.assertFalse(broker.logger.get_lines_for_level('error')) + self.assertTrue(os.path.exists(retiring_db_path)) + self.assertTrue(os.path.exists(broker.db_file)) + + # retiring file missing + broker.logger.clear() + os.unlink(retiring_db_path) + self.assertFalse(broker.set_sharded_state()) + lines = broker.logger.get_lines_for_level('warning') + self.assertIn('Refusing to delete', lines[0]) + self.assertFalse(lines[1:]) + self.assertFalse(broker.logger.get_lines_for_level('error')) + self.assertTrue(os.path.exists(broker.db_file)) + + @with_tempdir + def test_get_brokers(self, tempdir): + ts_iter = make_timestamp_iter() + retiring_db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(retiring_db_path, account='a', container='c', + logger=FakeLogger()) + broker.initialize(next(ts_iter).internal, 0) + brokers = broker.get_brokers() + self.assertEqual(retiring_db_path, brokers[0].db_file) + self.assertFalse(brokers[0].skip_commits) + self.assertFalse(brokers[1:]) + + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + brokers = broker.get_brokers() + self.assertEqual(retiring_db_path, brokers[0].db_file) + self.assertTrue(brokers[0].skip_commits) + self.assertEqual(broker.db_file, brokers[1].db_file) + self.assertFalse(brokers[1].skip_commits) + self.assertFalse(brokers[2:]) + + # same outcome when called on retiring db broker + brokers = brokers[0].get_brokers() + self.assertEqual(retiring_db_path, brokers[0].db_file) + self.assertTrue(brokers[0].skip_commits) + self.assertEqual(broker.db_file, brokers[1].db_file) + self.assertFalse(brokers[1].skip_commits) + self.assertFalse(brokers[2:]) + + self.assertTrue(broker.set_sharded_state()) + brokers = broker.get_brokers() + self.assertEqual(broker.db_file, brokers[0].db_file) + self.assertFalse(brokers[0].skip_commits) + self.assertFalse(brokers[1:]) + + # unexpected extra file should be ignored + with open(retiring_db_path, 'wb'): + pass + retiring_db_path = broker.db_file + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + broker.reload_db_files() + self.assertEqual(3, len(broker.db_files)) # sanity check + brokers = broker.get_brokers() + self.assertEqual(retiring_db_path, brokers[0].db_file) + self.assertTrue(brokers[0].skip_commits) + self.assertEqual(broker.db_file, brokers[1].db_file) + self.assertFalse(brokers[1].skip_commits) + self.assertFalse(brokers[2:]) + lines = broker.logger.get_lines_for_level('warning') + self.assertIn('Unexpected db files', lines[0]) + self.assertFalse(lines[1:]) + + @with_tempdir + def test_merge_shard_ranges(self, tempdir): + ts_iter = make_timestamp_iter() + ts = [next(ts_iter) for _ in range(13)] + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker( + db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + + # sanity check + self.assertFalse(broker.get_shard_ranges(include_deleted=True)) + + broker.merge_shard_ranges(None) + self.assertFalse(broker.get_shard_ranges(include_deleted=True)) + + # merge item at ts1 + # sr___ + sr_b_1_1 = ShardRange('a/c_b', ts[1], lower='a', upper='b', + object_count=2) + broker.merge_shard_ranges([sr_b_1_1]) + self._assert_shard_ranges(broker, [sr_b_1_1]) + + # merge older item - ignored + sr_b_0_0 = ShardRange('a/c_b', ts[0], lower='a', upper='b', + object_count=1) + broker.merge_shard_ranges([sr_b_0_0]) + self._assert_shard_ranges(broker, [sr_b_1_1]) + + # merge same timestamp - ignored + broker.merge_shard_ranges([dict(sr_b_1_1, lower='', upper='c')]) + self._assert_shard_ranges(broker, [sr_b_1_1]) + broker.merge_shard_ranges([dict(sr_b_1_1, object_count=99)]) + self._assert_shard_ranges(broker, [sr_b_1_1]) + + # merge list with older item *after* newer item + sr_c_2_2 = ShardRange('a/c_c', ts[2], lower='b', upper='c', + object_count=3) + sr_c_3_3 = ShardRange('a/c_c', ts[3], lower='b', upper='c', + object_count=4) + broker.merge_shard_ranges([sr_c_3_3, sr_c_2_2]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_3_3]) + + # merge newer item - updated + sr_c_5_5 = ShardRange('a/c_c', ts[5], lower='b', upper='c', + object_count=5) + broker.merge_shard_ranges([sr_c_5_5]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_5]) + + # merge older metadata item - ignored + sr_c_5_4 = ShardRange('a/c_c', ts[5], lower='b', upper='c', + object_count=6, meta_timestamp=ts[4]) + broker.merge_shard_ranges([sr_c_5_4]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_5]) + + # merge newer metadata item - only metadata is updated + sr_c_5_6 = ShardRange('a/c_c', ts[5], lower='b', upper='c', + object_count=7, meta_timestamp=ts[6]) + broker.merge_shard_ranges([dict(sr_c_5_6, lower='', upper='d')]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_6]) + + # merge older created_at, newer metadata item - ignored + sr_c_4_7 = ShardRange('a/c_c', ts[4], lower='b', upper='c', + object_count=8, meta_timestamp=ts[7]) + broker.merge_shard_ranges([sr_c_4_7]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_6]) + + # merge list with older metadata item *after* newer metadata item + sr_c_5_11 = ShardRange('a/c_c', ts[5], lower='b', upper='c', + object_count=9, meta_timestamp=ts[11]) + broker.merge_shard_ranges([sr_c_5_11, sr_c_5_6]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11]) + + # deleted item at *same timestamp* as existing - deleted ignored + broker.merge_shard_ranges([dict(sr_b_1_1, deleted=1, object_count=0)]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11]) + sr_b_1_1.meta_timestamp = ts[11] + broker.merge_shard_ranges([dict(sr_b_1_1, deleted=1)]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11]) + sr_b_1_1.state_timestamp = ts[11] + broker.merge_shard_ranges([dict(sr_b_1_1, deleted=1)]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11]) + + # delete item at *newer timestamp* - updated + sr_b_2_2_deleted = ShardRange('a/c_b', ts[2], lower='a', upper='b', + object_count=0, deleted=1) + broker.merge_shard_ranges([sr_b_2_2_deleted]) + self._assert_shard_ranges(broker, [sr_b_2_2_deleted, sr_c_5_11]) + + # merge list with older undeleted item *after* newer deleted item + # NB deleted timestamp trumps newer meta timestamp + sr_c_9_12 = ShardRange('a/c_c', ts[9], lower='b', upper='c', + object_count=10, meta_timestamp=ts[12]) + sr_c_10_10_deleted = ShardRange('a/c_c', ts[10], lower='b', upper='c', + object_count=0, deleted=1) + broker.merge_shard_ranges([sr_c_10_10_deleted, sr_c_9_12]) + self._assert_shard_ranges( + broker, [sr_b_2_2_deleted, sr_c_10_10_deleted]) + + @with_tempdir + def test_merge_shard_ranges_state(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + expected_shard_ranges = [] + + def do_test(orig_state, orig_timestamp, test_state, test_timestamp, + expected_state, expected_timestamp): + index = len(expected_shard_ranges) + sr = ShardRange('a/%s' % index, orig_timestamp, '%03d' % index, + '%03d' % (index + 1), state=orig_state) + broker.merge_shard_ranges([sr]) + sr.state = test_state + sr.state_timestamp = test_timestamp + broker.merge_shard_ranges([sr]) + sr.state = expected_state + sr.state_timestamp = expected_timestamp + expected_shard_ranges.append(sr) + self._assert_shard_ranges(broker, expected_shard_ranges) + + # state at older state_timestamp is not merged + for orig_state in ShardRange.STATES: + for test_state in ShardRange.STATES: + ts_older = next(ts_iter) + ts = next(ts_iter) + do_test(orig_state, ts, test_state, ts_older, orig_state, ts) + + # more advanced state at same timestamp is merged + for orig_state in ShardRange.STATES: + for test_state in ShardRange.STATES: + ts = next(ts_iter) + do_test(orig_state, ts, test_state, ts, + test_state if test_state > orig_state else orig_state, + ts) + + # any state at newer timestamp is merged + for orig_state in ShardRange.STATES: + for test_state in ShardRange.STATES: + ts = next(ts_iter) + ts_newer = next(ts_iter) + do_test(orig_state, ts, test_state, ts_newer, test_state, + ts_newer) + + def _check_object_stats_when_sharded(self, a, c, root_a, root_c, tempdir): + # common setup and assertions for root and shard containers + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker( + db_path, account=a, container=c) + broker.initialize(next(ts_iter).internal, 0) + broker.set_sharding_sysmeta('Root', '%s/%s' % (root_a, root_c)) + broker.merge_items([{'name': 'obj', 'size': 14, 'etag': 'blah', + 'content_type': 'text/plain', 'deleted': 0, + 'created_at': Timestamp.now().internal}]) + self.assertEqual(1, broker.get_info()['object_count']) + self.assertEqual(14, broker.get_info()['bytes_used']) + + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + sr_1 = ShardRange( + '%s/%s1' % (root_a, root_c), Timestamp.now(), lower='', upper='m', + object_count=99, bytes_used=999, state=ShardRange.ACTIVE) + sr_2 = ShardRange( + '%s/%s2' % (root_a, root_c), Timestamp.now(), lower='m', upper='', + object_count=21, bytes_used=1000, state=ShardRange.ACTIVE) + broker.merge_shard_ranges([sr_1, sr_2]) + self.assertEqual(1, broker.get_info()['object_count']) + self.assertEqual(14, broker.get_info()['bytes_used']) + return broker + + @with_tempdir + def test_object_stats_root_container(self, tempdir): + broker = self._check_object_stats_when_sharded( + 'a', 'c', 'a', 'c', tempdir) + self.assertTrue(broker.is_root_container()) # sanity + self.assertTrue(broker.set_sharded_state()) + self.assertEqual(120, broker.get_info()['object_count']) + self.assertEqual(1999, broker.get_info()['bytes_used']) + + @with_tempdir + def test_object_stats_shard_container(self, tempdir): + broker = self._check_object_stats_when_sharded( + '.shard_a', 'c-blah', 'a', 'c', tempdir) + self.assertFalse(broker.is_root_container()) # sanity + self.assertTrue(broker.set_sharded_state()) + self.assertEqual(0, broker.get_info()['object_count']) + self.assertEqual(0, broker.get_info()['bytes_used']) + class TestCommonContainerBroker(test_db.TestExampleBroker): @@ -2132,6 +4477,8 @@ class ContainerBrokerMigrationMixin(object): ContainerBroker.create_object_table ContainerBroker.create_object_table = \ prespi_create_object_table + self._imported_create_shard_ranges_table = \ + ContainerBroker.create_shard_range_table self._imported_create_container_info_table = \ ContainerBroker.create_container_info_table ContainerBroker.create_container_info_table = \ @@ -2156,6 +4503,8 @@ class ContainerBrokerMigrationMixin(object): self._imported_create_container_info_table ContainerBroker.create_object_table = \ self._imported_create_object_table + ContainerBroker.create_shard_range_table = \ + self._imported_create_shard_ranges_table ContainerBroker.create_policy_stat_table = \ self._imported_create_policy_stat_table @@ -2209,6 +4558,8 @@ class TestContainerBrokerBeforeMetadata(ContainerBrokerMigrationMixin, Tests for ContainerBroker against databases created before the metadata column was added. """ + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'container_stat', 'shard_range'} def setUp(self): super(TestContainerBrokerBeforeMetadata, self).setUp() @@ -2281,6 +4632,8 @@ class TestContainerBrokerBeforeXSync(ContainerBrokerMigrationMixin, Tests for ContainerBroker against databases created before the x_container_sync_point[12] columns were added. """ + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'container_stat', 'shard_range'} def setUp(self): super(TestContainerBrokerBeforeXSync, self).setUp() @@ -2395,6 +4748,8 @@ class TestContainerBrokerBeforeSPI(ContainerBrokerMigrationMixin, Tests for ContainerBroker against databases created before the storage_policy_index column was added. """ + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'container_stat', 'shard_range'} def setUp(self): super(TestContainerBrokerBeforeSPI, self).setUp() @@ -2599,6 +4954,48 @@ class TestContainerBrokerBeforeSPI(ContainerBrokerMigrationMixin, self.assertEqual(info['bytes_used'], 456) +class TestContainerBrokerBeforeShardRanges(ContainerBrokerMigrationMixin, + TestContainerBroker): + """ + Tests for ContainerBroker against databases created + before the shard_ranges table was added. + """ + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'container_stat'} + + class Override(object): + def __init__(self, func): + self.func = func + + def __get__(self, obj, obj_type): + if inspect.stack()[1][3] == '_initialize': + return lambda *a, **kw: None + return self.func.__get__(obj, obj_type) + + def setUp(self): + super(TestContainerBrokerBeforeShardRanges, self).setUp() + ContainerBroker.create_shard_range_table = self.Override( + ContainerBroker.create_shard_range_table) + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + exc = None + with broker.get() as conn: + try: + conn.execute('''SELECT * + FROM shard_range''') + except BaseException as err: + exc = err + self.assertTrue('no such table: shard_range' in str(exc)) + + def tearDown(self): + super(TestContainerBrokerBeforeShardRanges, self).tearDown() + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + with broker.get() as conn: + conn.execute('''SELECT * + FROM shard_range''') + + class TestUpdateNewItemFromExisting(unittest.TestCase): # TODO: add test scenarios that have swift_bytes in content_type t0 = '1234567890.00000' diff --git a/test/unit/container/test_replicator.py b/test/unit/container/test_replicator.py index ff63a2992c..23f06ddc97 100644 --- a/test/unit/container/test_replicator.py +++ b/test/unit/container/test_replicator.py @@ -26,13 +26,17 @@ from swift.common import db_replicator from swift.container import replicator, backend, server, sync_store from swift.container.reconciler import ( MISPLACED_OBJECTS_ACCOUNT, get_reconciler_container_name) -from swift.common.utils import Timestamp, encode_timestamps +from swift.common.utils import Timestamp, encode_timestamps, ShardRange, \ + get_db_files, make_db_file_path from swift.common.storage_policy import POLICIES from test.unit.common import test_db_replicator -from test.unit import patch_policies, make_timestamp_iter, mock_check_drive +from test.unit import patch_policies, make_timestamp_iter, mock_check_drive, \ + debug_logger from contextlib import contextmanager +from test.unit.common.test_db_replicator import attach_fake_replication_rpc + @patch_policies class TestReplicatorSync(test_db_replicator.TestReplicatorSync): @@ -42,6 +46,16 @@ class TestReplicatorSync(test_db_replicator.TestReplicatorSync): replicator_daemon = replicator.ContainerReplicator replicator_rpc = replicator.ContainerReplicatorRpc + def assertShardRangesEqual(self, x, y): + # ShardRange.__eq__ only compares lower and upper; here we generate + # dict representations to compare all attributes + self.assertEqual([dict(sr) for sr in x], [dict(sr) for sr in y]) + + def assertShardRangesNotEqual(self, x, y): + # ShardRange.__eq__ only compares lower and upper; here we generate + # dict representations to compare all attributes + self.assertNotEqual([dict(sr) for sr in x], [dict(sr) for sr in y]) + def test_report_up_to_date(self): broker = self._get_broker('a', 'c', node_index=0) broker.initialize(Timestamp(1).internal, int(POLICIES.default)) @@ -1148,6 +1162,1037 @@ class TestReplicatorSync(test_db_replicator.TestReplicatorSync): self.assertEqual(1, mock_remove.call_count) self.assertEqual(broker_2.db_file, mock_remove.call_args[0][0].db_file) + def test_cleanup_post_replicate(self): + broker = self._get_broker('a', 'c', node_index=0) + put_timestamp = Timestamp.now() + broker.initialize(put_timestamp.internal, POLICIES.default.idx) + orig_info = broker.get_replication_info() + daemon = replicator.ContainerReplicator({}, logger=self.logger) + + # db should not be here, replication ok, deleted + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertFalse(os.path.exists(broker.db_file)) + self.assertEqual(['Successfully deleted db %s' % broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # failed replication, not deleted + broker.initialize(put_timestamp.internal, POLICIES.default.idx) + orig_info = broker.get_replication_info() + res = daemon.cleanup_post_replicate(broker, orig_info, + [False, True, True]) + self.assertTrue(res) + self.assertTrue(os.path.exists(broker.db_file)) + self.assertEqual(['Not deleting db %s (2/3 success)' % broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # db has shard ranges, not deleted + broker.enable_sharding(Timestamp.now()) + broker.merge_shard_ranges( + [ShardRange('.shards_a/c', Timestamp.now(), '', 'm')]) + self.assertTrue(broker.sharding_required()) # sanity check + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertTrue(os.path.exists(broker.db_file)) + self.assertEqual( + ['Not deleting db %s (requires sharding, state unsharded)' % + broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # db sharding, not deleted + self._goto_sharding_state(broker, Timestamp.now()) + self.assertTrue(broker.sharding_required()) # sanity check + orig_info = broker.get_replication_info() + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertTrue(os.path.exists(broker.db_file)) + self.assertEqual( + ['Not deleting db %s (requires sharding, state sharding)' % + broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # db sharded, should not be here, failed replication, not deleted + self._goto_sharded_state(broker) + self.assertFalse(broker.sharding_required()) # sanity check + res = daemon.cleanup_post_replicate(broker, orig_info, + [True, False, True]) + self.assertTrue(res) + self.assertTrue(os.path.exists(broker.db_file)) + self.assertEqual(['Not deleting db %s (2/3 success)' % + broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # db sharded, should not be here, new shard ranges (e.g. from reverse + # replication), deleted + broker.merge_shard_ranges( + [ShardRange('.shards_a/c', Timestamp.now(), '', 'm')]) + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertFalse(os.path.exists(broker.db_file)) + daemon.logger.clear() + + # db sharded, should not be here, replication ok, deleted + broker.initialize(put_timestamp.internal, POLICIES.default.idx) + self.assertTrue(os.path.exists(broker.db_file)) + orig_info = broker.get_replication_info() + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertFalse(os.path.exists(broker.db_file)) + self.assertEqual(['Successfully deleted db %s' % broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + def test_sync_shard_ranges(self): + put_timestamp = Timestamp.now().internal + # create "local" broker + broker = self._get_broker('a', 'c', node_index=0) + broker.initialize(put_timestamp, POLICIES.default.idx) + # create "remote" broker + remote_broker = self._get_broker('a', 'c', node_index=1) + remote_broker.initialize(put_timestamp, POLICIES.default.idx) + + def check_replicate(expected_shard_ranges, from_broker, to_broker): + daemon = replicator.ContainerReplicator({}) + part, node = self._get_broker_part_node(to_broker) + info = broker.get_replication_info() + success = daemon._repl_to_node(node, from_broker, part, info) + self.assertTrue(success) + self.assertEqual( + expected_shard_ranges, + to_broker.get_all_shard_range_data() + ) + self.assertEqual(1, daemon.stats['deferred']) + self.assertEqual(0, daemon.stats['rsync']) + self.assertEqual(0, daemon.stats['diff']) + local_info = self._get_broker( + 'a', 'c', node_index=0).get_info() + remote_info = self._get_broker( + 'a', 'c', node_index=1).get_info() + for k, v in local_info.items(): + if k == 'id': + continue + self.assertEqual(remote_info[k], v, + "mismatch remote %s %r != %r" % ( + k, remote_info[k], v)) + + bounds = (('', 'g'), ('g', 'r'), ('r', '')) + shard_ranges = [ + ShardRange('.shards_a/sr-%s' % upper, Timestamp.now(), lower, + upper, i + 1, 10 * (i + 1)) + for i, (lower, upper) in enumerate(bounds) + ] + # add first two shard_ranges to both brokers + for shard_range in shard_ranges[:2]: + for db in (broker, remote_broker): + db.merge_shard_ranges(shard_range) + # now add a shard range to the "local" broker only + own_sr = broker.enable_sharding(Timestamp.now()) + broker.merge_shard_ranges(shard_ranges[2]) + broker_ranges = broker.get_all_shard_range_data() + self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges) + check_replicate(broker_ranges, broker, remote_broker) + + # update one shard range + shard_ranges[1].update_meta(99, 0) + broker.merge_shard_ranges(shard_ranges[1]) + # sanity check + broker_ranges = broker.get_all_shard_range_data() + self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges) + check_replicate(broker_ranges, broker, remote_broker) + + # delete one shard range + shard_ranges[0].deleted = 1 + shard_ranges[0].timestamp = Timestamp.now() + broker.merge_shard_ranges(shard_ranges[0]) + # sanity check + broker_ranges = broker.get_all_shard_range_data() + self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges) + check_replicate(broker_ranges, broker, remote_broker) + + # put a shard range again + shard_ranges[2].timestamp = Timestamp.now() + shard_ranges[2].object_count = 0 + broker.merge_shard_ranges(shard_ranges[2]) + # sanity check + broker_ranges = broker.get_all_shard_range_data() + self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges) + check_replicate(broker_ranges, broker, remote_broker) + + # update same shard range on local and remote, remote later + shard_ranges[-1].meta_timestamp = Timestamp.now() + shard_ranges[-1].bytes_used += 1000 + broker.merge_shard_ranges(shard_ranges[-1]) + remote_shard_ranges = remote_broker.get_shard_ranges( + include_deleted=True) + remote_shard_ranges[-1].meta_timestamp = Timestamp.now() + remote_shard_ranges[-1].bytes_used += 2000 + remote_broker.merge_shard_ranges(remote_shard_ranges[-1]) + # sanity check + remote_broker_ranges = remote_broker.get_all_shard_range_data() + self.assertShardRangesEqual(remote_shard_ranges + [own_sr], + remote_broker_ranges) + self.assertShardRangesNotEqual(shard_ranges, remote_shard_ranges) + check_replicate(remote_broker_ranges, broker, remote_broker) + + # undelete shard range *on the remote* + deleted_ranges = [sr for sr in remote_shard_ranges if sr.deleted] + self.assertEqual([shard_ranges[0]], deleted_ranges) + deleted_ranges[0].deleted = 0 + deleted_ranges[0].timestamp = Timestamp.now() + remote_broker.merge_shard_ranges(deleted_ranges[0]) + # sanity check + remote_broker_ranges = remote_broker.get_all_shard_range_data() + self.assertShardRangesEqual(remote_shard_ranges + [own_sr], + remote_broker_ranges) + self.assertShardRangesNotEqual(shard_ranges, remote_shard_ranges) + check_replicate(remote_broker_ranges, broker, remote_broker) + + # reverse replication direction and expect syncs to propagate + check_replicate(remote_broker_ranges, remote_broker, broker) + + def test_sync_shard_ranges_with_rsync(self): + broker = self._get_broker('a', 'c', node_index=0) + put_timestamp = time.time() + broker.initialize(put_timestamp, POLICIES.default.idx) + + bounds = (('', 'g'), ('g', 'r'), ('r', '')) + shard_ranges = [ + ShardRange('.shards_a/sr-%s' % upper, Timestamp.now(), lower, + upper, i + 1, 10 * (i + 1)) + for i, (lower, upper) in enumerate(bounds) + ] + # add first shard range + own_sr = broker.enable_sharding(Timestamp.now()) + broker.merge_shard_ranges(shard_ranges[:1]) + + # "replicate" + part, node = self._get_broker_part_node(broker) + daemon = self._run_once(node) + self.assertEqual(2, daemon.stats['rsync']) + + # complete rsync to all other nodes + def check_replicate(expected_ranges): + for i in range(1, 3): + remote_broker = self._get_broker('a', 'c', node_index=i) + self.assertTrue(os.path.exists(remote_broker.db_file)) + self.assertShardRangesEqual( + expected_ranges, + remote_broker.get_shard_ranges(include_deleted=True, + include_own=True) + ) + remote_info = remote_broker.get_info() + local_info = self._get_broker( + 'a', 'c', node_index=0).get_info() + for k, v in local_info.items(): + if k == 'id': + continue + if k == 'hash': + self.assertEqual(remote_info[k], '0' * 32) + continue + if k == 'object_count': + self.assertEqual(remote_info[k], 0) + continue + self.assertEqual(remote_info[k], v, + "mismatch remote %s %r != %r" % ( + k, remote_info[k], v)) + + check_replicate([shard_ranges[0], own_sr]) + + # delete and add some more shard ranges + shard_ranges[0].deleted = 1 + shard_ranges[0].timestamp = Timestamp.now() + for shard_range in shard_ranges: + broker.merge_shard_ranges(shard_range) + daemon = self._run_once(node) + self.assertEqual(2, daemon.stats['deferred']) + check_replicate(shard_ranges + [own_sr]) + + def check_replicate(self, from_broker, remote_node_index, repl_conf=None, + expect_success=True, errors=None): + repl_conf = repl_conf or {} + repl_calls = [] + rsync_calls = [] + + def repl_hook(op, *sync_args): + repl_calls.append((op, sync_args)) + + fake_repl_connection = attach_fake_replication_rpc( + self.rpc, replicate_hook=repl_hook, errors=errors) + db_replicator.ReplConnection = fake_repl_connection + daemon = replicator.ContainerReplicator( + repl_conf, logger=debug_logger()) + self._install_fake_rsync_file(daemon, rsync_calls) + part, nodes = self._ring.get_nodes(from_broker.account, + from_broker.container) + + def find_node(node_index): + for node in nodes: + if node['index'] == node_index: + return node + else: + self.fail('Failed to find node index %s' % remote_node_index) + + remote_node = find_node(remote_node_index) + info = from_broker.get_replication_info() + success = daemon._repl_to_node(remote_node, from_broker, part, info) + self.assertEqual(expect_success, success) + return daemon, repl_calls, rsync_calls + + def assert_synced_shard_ranges(self, expected, synced_items): + expected.sort(key=lambda sr: (sr.lower, sr.upper)) + for item in synced_items: + item.pop('record_type', None) + self.assertEqual([dict(ex) for ex in expected], synced_items) + + def assert_info_synced(self, local, remote_node_index, mismatches=None): + mismatches = mismatches or [] + mismatches.append('id') + remote = self._get_broker(local.account, local.container, + node_index=remote_node_index) + local_info = local.get_info() + remote_info = remote.get_info() + errors = [] + for k, v in local_info.items(): + if remote_info.get(k) == v: + if k in mismatches: + errors.append( + "unexpected match remote %s %r == %r" % ( + k, remote_info[k], v)) + continue + else: + if k not in mismatches: + errors.append( + "unexpected mismatch remote %s %r != %r" % ( + k, remote_info[k], v)) + if errors: + self.fail('Found sync errors:\n' + '\n'.join(errors)) + + def assert_shard_ranges_synced(self, local_broker, remote_broker): + self.assertShardRangesEqual( + local_broker.get_shard_ranges(include_deleted=True, + include_own=True), + remote_broker.get_shard_ranges(include_deleted=True, + include_own=True) + ) + + def _setup_replication_test(self, node_index): + ts_iter = make_timestamp_iter() + policy_idx = POLICIES.default.idx + put_timestamp = Timestamp.now().internal + # create "local" broker + broker = self._get_broker('a', 'c', node_index=node_index) + broker.initialize(put_timestamp, policy_idx) + + objs = [{'name': 'blah%03d' % i, 'created_at': next(ts_iter).internal, + 'size': i, 'content_type': 'text/plain', 'etag': 'etag%s' % i, + 'deleted': 0, 'storage_policy_index': policy_idx} + for i in range(20)] + bounds = (('', 'a'), ('a', 'b'), ('b', 'c'), ('c', '')) + shard_ranges = [ + ShardRange( + '.sharded_a/sr-%s' % upper, Timestamp.now(), lower, upper) + for i, (lower, upper) in enumerate(bounds) + ] + return {'broker': broker, + 'objects': objs, + 'shard_ranges': shard_ranges} + + def _merge_object(self, broker, objects, index, **kwargs): + if not isinstance(index, slice): + index = slice(index, index + 1) + objs = [dict(obj) for obj in objects[index]] + broker.merge_items(objs) + + def _merge_shard_range(self, broker, shard_ranges, index, **kwargs): + broker.merge_shard_ranges(shard_ranges[index:index + 1]) + + def _goto_sharding_state(self, broker, epoch): + broker.enable_sharding(epoch) + self.assertTrue(broker.set_sharding_state()) + self.assertEqual(backend.SHARDING, broker.get_db_state()) + + def _goto_sharded_state(self, broker): + self.assertTrue(broker.set_sharded_state()) + self.assertEqual(backend.SHARDED, broker.get_db_state()) + + def _assert_local_sharded_in_sync(self, local_broker, local_id): + daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1) + self.assertEqual(['sync', 'get_shard_ranges', 'merge_shard_ranges'], + [call[0] for call in repl_calls]) + self.assertEqual(1, daemon.stats['deferred']) + self.assertEqual(0, daemon.stats['rsync']) + self.assertEqual(0, daemon.stats['diff']) + self.assertFalse(rsync_calls) + # new db sync + self.assertEqual(local_id, repl_calls[0][1][2]) + # ...but we still get a merge_shard_ranges for shard ranges + self.assert_synced_shard_ranges( + local_broker.get_shard_ranges(include_own=True), + repl_calls[2][1][0]) + self.assertEqual(local_id, repl_calls[2][1][1]) + + def _check_only_shard_ranges_replicated(self, local_broker, + remote_node_index, + repl_conf, + expected_shard_ranges, + expect_success=True): + # expected_shard_ranges is expected final list of sync'd ranges + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, remote_node_index, repl_conf, + expect_success=expect_success) + + # we always expect only shard ranges to end in abort + self.assertEqual(1, daemon.stats['deferred']) + self.assertEqual(0, daemon.stats['diff']) + self.assertEqual(0, daemon.stats['rsync']) + self.assertEqual(['sync', 'get_shard_ranges', 'merge_shard_ranges'], + [call[0] for call in repl_calls]) + self.assertFalse(rsync_calls) + # sync + local_id = local_broker.get_info()['id'] + self.assertEqual(local_id, repl_calls[0][1][2]) + # get_shard_ranges + self.assertEqual((), repl_calls[1][1]) + # merge_shard_ranges for sending local shard ranges + self.assertShardRangesEqual(expected_shard_ranges, repl_calls[2][1][0]) + self.assertEqual(local_id, repl_calls[2][1][1]) + remote_broker = self._get_broker( + local_broker.account, local_broker.container, node_index=1) + self.assertNotEqual(local_id, remote_broker.get_info()['id']) + self.assert_shard_ranges_synced(remote_broker, local_broker) + + def test_replication_local_unsharded_remote_missing(self): + context = self._setup_replication_test(0) + local_broker = context['broker'] + local_id = local_broker.get_info()['id'] + objs = context['objects'] + self._merge_object(index=0, **context) + + daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1) + + self.assert_info_synced(local_broker, 1) + self.assertEqual(1, daemon.stats['rsync']) + self.assertEqual(['sync', 'complete_rsync'], + [call[0] for call in repl_calls]) + self.assertEqual(local_id, repl_calls[1][1][0]) + self.assertEqual(os.path.basename(local_broker.db_file), + repl_calls[1][1][1]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_id, os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assert_shard_ranges_synced(local_broker, remote_broker) + self.assertTrue(os.path.exists(remote_broker._db_file)) + self.assertNotEqual(local_id, remote_broker.get_info()['id']) + self.assertEqual(objs[:1], remote_broker.get_objects()) + + def _check_replication_local_unsharded_remote_sharded(self, repl_conf): + context = self._setup_replication_test(0) + local_broker = context['broker'] + local_id = local_broker.get_info()['id'] + self._merge_object(index=slice(0, 6), **context) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=4, **remote_context) + remote_broker = remote_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(remote_broker, epoch=epoch) + remote_context['shard_ranges'][0].object_count = 101 + remote_context['shard_ranges'][0].bytes_used = 1010 + remote_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **remote_context) + self._merge_object(index=5, **remote_context) + self._goto_sharded_state(remote_broker) + self.assertEqual(backend.SHARDED, remote_broker.get_db_state()) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + remote_broker.get_shard_ranges(include_own=True)) + + remote_broker = self._get_broker( + local_broker.account, local_broker.container, node_index=1) + self.assertEqual(backend.SHARDED, remote_broker.get_db_state()) + self.assertFalse(os.path.exists(remote_broker._db_file)) + self.assertNotEqual(local_id, remote_broker.get_info()['id']) + self.assertEqual(remote_context['objects'][5:6], + remote_broker.get_objects()) + + # Now that we have shard ranges, we're never considered in-sync :-/ + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + remote_broker.get_shard_ranges(include_own=True)) + + def test_replication_local_unsharded_remote_sharded(self): + self._check_replication_local_unsharded_remote_sharded({}) + + def test_replication_local_unsharded_remote_sharded_large_diff(self): + self._check_replication_local_unsharded_remote_sharded({'per_diff': 1}) + + def _check_replication_local_sharding_remote_missing(self, repl_conf): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=0, **local_context) + self._merge_object(index=1, **local_context) + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(2, 8), **local_context) + objs = local_context['objects'] + + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, repl_conf=repl_conf) + + self.assertEqual(['sync', 'complete_rsync'], + [call[0] for call in repl_calls]) + self.assertEqual(1, daemon.stats['rsync']) + self.assertEqual(0, daemon.stats['deferred']) + self.assertEqual(0, daemon.stats['diff']) + + # fresh db is sync'd first... + fresh_id = local_broker.get_info()['id'] + self.assertEqual(fresh_id, repl_calls[0][1][2]) + self.assertEqual(fresh_id, repl_calls[1][1][0]) + # retired db is not sync'd at all + old_broker = self.backend( + local_broker._db_file, account=local_broker.account, + container=local_broker.container, force_db_file=True) + old_id = old_broker.get_info()['id'] + bad_calls = [] + for call in repl_calls: + if old_id in call[1]: + bad_calls.append( + 'old db id %r in %r call args %r' % ( + old_id, call[0], call[1])) + if bad_calls: + self.fail('Found some bad calls:\n' + '\n'.join(bad_calls)) + # complete_rsync + self.assertEqual(os.path.basename(local_broker.db_file), + repl_calls[1][1][1]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(fresh_id, os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + # TODO: make these stats better; in sharding state local broker pulls + # stats for 2 objects from old db, whereas remote thinks it's sharded + # and has an empty shard range table + self.assert_info_synced(local_broker, 1, mismatches=[ + 'object_count', 'bytes_used', 'db_state']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + remote_id = remote_broker.get_info()['id'] + self.assertNotEqual(old_id, remote_id) + self.assertNotEqual(fresh_id, remote_id) + self.assertEqual( + [remote_broker.db_file], get_db_files(remote_broker.db_file)) + self.assertEqual(os.path.basename(remote_broker.db_file), + os.path.basename(local_broker.db_file)) + self.assertEqual(epoch, remote_broker.db_epoch) + # remote db has only the misplaced objects + self.assertEqual(objs[2:8], remote_broker.get_objects()) + self.assert_shard_ranges_synced(local_broker, remote_broker) + + # replicate again, check asserts abort + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True)) + + # sanity + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assertEqual( + [remote_broker.db_file], get_db_files(remote_broker.db_file)) + self.assertEqual(os.path.basename(remote_broker.db_file), + os.path.basename(local_broker.db_file)) + self.assertEqual(objs[2:8], remote_broker.get_objects()) + self.assertEqual(epoch, remote_broker.db_epoch) + + def test_replication_local_sharding_remote_missing(self): + self._check_replication_local_sharding_remote_missing({}) + + def test_replication_local_sharding_remote_missing_large_diff(self): + # the local shard db has large diff with respect to the old db + self._check_replication_local_sharding_remote_missing({'per_diff': 1}) + + def _check_replication_local_sharding_remote_unsharded(self, repl_conf): + local_context = self._setup_replication_test(0) + self._merge_object(index=slice(0, 3), **local_context) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(3, 11), **local_context) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=11, **remote_context) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True)) + + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assertEqual( + [remote_broker._db_file], get_db_files(remote_broker.db_file)) + self.assertEqual(remote_context['objects'][11:12], + remote_broker.get_objects()) + + self.assert_info_synced( + local_broker, 1, + mismatches=['db_state', 'object_count', 'bytes_used', + 'status_changed_at', 'hash']) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True)) + + def test_replication_local_sharding_remote_unsharded(self): + self._check_replication_local_sharding_remote_unsharded({}) + + def test_replication_local_sharding_remote_unsharded_large_diff(self): + self._check_replication_local_sharding_remote_unsharded( + {'per_diff': 1}) + + def _check_replication_local_sharding_remote_sharding(self, repl_conf): + local_context = self._setup_replication_test(0) + self._merge_object(index=slice(0, 5), **local_context) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(5, 10), **local_context) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=12, **remote_context) + # take snapshot of info now before transition to sharding... + orig_remote_info = remote_context['broker'].get_info() + remote_broker = remote_context['broker'] + self._goto_sharding_state(remote_broker, epoch) + self._merge_shard_range(index=0, **remote_context) + self._merge_object(index=13, **remote_context) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + remote_broker.get_shard_ranges(include_own=True)) + + # in sharding state brokers only reports object stats from old db, and + # they are different + self.assert_info_synced( + local_broker, 1, mismatches=['object_count', 'bytes_used', + 'status_changed_at', 'hash']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([remote_broker._db_file, shard_db], + get_db_files(remote_broker.db_file)) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([remote_broker._db_file, shard_db], + get_db_files(remote_broker.db_file)) + # no local objects have been sync'd to remote shard db + self.assertEqual(remote_context['objects'][13:14], + remote_broker.get_objects()) + # remote *old db* is unchanged + remote_old_broker = self.backend( + remote_broker._db_file, account=remote_broker.account, + container=remote_broker.container, force_db_file=True) + self.assertEqual(remote_context['objects'][12:13], + remote_old_broker.get_objects()) + self.assertFalse(remote_old_broker.get_shard_ranges()) + remote_old_info = remote_old_broker.get_info() + orig_remote_info.pop('db_state') + remote_old_info.pop('db_state') + self.assertEqual(orig_remote_info, remote_old_info) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True)) + + def test_replication_local_sharding_remote_sharding(self): + self._check_replication_local_sharding_remote_sharding({}) + + def test_replication_local_sharding_remote_sharding_large_diff(self): + self._check_replication_local_sharding_remote_sharding({'per_diff': 1}) + + def test_replication_local_sharded_remote_missing(self): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + local_context['shard_ranges'][0].object_count = 99 + local_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(0, 3), **local_context) + self._goto_sharded_state(local_broker) + objs = local_context['objects'] + + daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1) + + self.assertEqual(['sync', 'complete_rsync'], + [call[0] for call in repl_calls]) + self.assertEqual(1, daemon.stats['rsync']) + + # sync + local_id = local_broker.get_info()['id'] + self.assertEqual(local_id, repl_calls[0][1][2]) + # complete_rsync + self.assertEqual(local_id, repl_calls[1][1][0]) + self.assertEqual( + os.path.basename(local_broker.db_file), repl_calls[1][1][1]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_id, os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + self.assert_info_synced(local_broker, 1) + + remote_broker = self._get_broker('a', 'c', node_index=1) + remote_id = remote_broker.get_info()['id'] + self.assertNotEqual(local_id, remote_id) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([shard_db], + get_db_files(remote_broker.db_file)) + self.assertEqual(objs[:3], remote_broker.get_objects()) + self.assertEqual(local_broker.get_shard_ranges(), + remote_broker.get_shard_ranges()) + + # sanity check - in sync + self._assert_local_sharded_in_sync(local_broker, local_id) + + remote_broker = self._get_broker('a', 'c', node_index=1) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([shard_db], + get_db_files(remote_broker.db_file)) + # the remote broker object_count comes from replicated shard range... + self.assertEqual(99, remote_broker.get_info()['object_count']) + # these are replicated misplaced objects... + self.assertEqual(objs[:3], remote_broker.get_objects()) + self.assertEqual(local_broker.get_shard_ranges(), + remote_broker.get_shard_ranges()) + + def _check_replication_local_sharded_remote_unsharded(self, repl_conf): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + local_context['shard_ranges'][0].object_count = 99 + local_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(0, 3), **local_context) + self._goto_sharded_state(local_broker) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=4, **remote_context) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True), + expect_success=True) + + # sharded broker takes object count from shard range whereas remote + # unsharded broker takes it from object table + self.assert_info_synced( + local_broker, 1, + mismatches=['db_state', 'object_count', 'bytes_used', + 'status_changed_at', 'hash']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assertEqual([remote_broker._db_file], + get_db_files(remote_broker.db_file)) + self.assertEqual(remote_context['objects'][4:5], + remote_broker.get_objects()) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True), + expect_success=True) + + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assertEqual([remote_broker._db_file], + get_db_files(remote_broker.db_file)) + self.assertEqual(remote_context['objects'][4:5], + remote_broker.get_objects()) + + def test_replication_local_sharded_remote_unsharded(self): + self._check_replication_local_sharded_remote_unsharded({}) + + def test_replication_local_sharded_remote_unsharded_large_diff(self): + self._check_replication_local_sharded_remote_unsharded({'per_diff': 1}) + + def _check_replication_local_sharded_remote_sharding(self, repl_conf): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch=epoch) + local_context['shard_ranges'][0].object_count = 99 + local_context['shard_ranges'][0].bytes_used = 999 + local_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(0, 5), **local_context) + self._goto_sharded_state(local_broker) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=6, **remote_context) + remote_broker = remote_context['broker'] + remote_info_orig = remote_broker.get_info() + self._goto_sharding_state(remote_broker, epoch=epoch) + self._merge_shard_range(index=0, **remote_context) + self._merge_object(index=7, **remote_context) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + # remote has newer timestamp for shard range + remote_broker.get_shard_ranges(include_own=True), + expect_success=True) + + # sharded broker takes object count from shard range whereas remote + # sharding broker takes it from object table + self.assert_info_synced( + local_broker, 1, + mismatches=['db_state', 'object_count', 'bytes_used', + 'status_changed_at', 'hash']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([remote_broker._db_file, shard_db], + get_db_files(remote_broker.db_file)) + # remote fresh db objects are unchanged + self.assertEqual(remote_context['objects'][7:8], + remote_broker.get_objects()) + # remote old hash.db objects are unchanged + remote_old_broker = self.backend( + remote_broker._db_file, account=remote_broker.account, + container=remote_broker.container, force_db_file=True) + self.assertEqual( + remote_context['objects'][6:7], + remote_old_broker.get_objects()) + remote_info = remote_old_broker.get_info() + remote_info_orig.pop('db_state') + remote_info.pop('db_state') + self.assertEqual(remote_info_orig, remote_info) + self.assertEqual(local_broker.get_shard_ranges(), + remote_broker.get_shard_ranges()) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + remote_broker.get_shard_ranges(include_own=True), + expect_success=True) + + def test_replication_local_sharded_remote_sharding(self): + self._check_replication_local_sharded_remote_sharding({}) + + def test_replication_local_sharded_remote_sharding_large_diff(self): + self._check_replication_local_sharded_remote_sharding({'per_diff': 1}) + + def _check_replication_local_sharded_remote_sharded(self, repl_conf): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + local_context['shard_ranges'][0].object_count = 99 + local_context['shard_ranges'][0].bytes_used = 999 + local_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(0, 6), **local_context) + self._goto_sharded_state(local_broker) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=6, **remote_context) + remote_broker = remote_context['broker'] + self._goto_sharding_state(remote_broker, epoch) + remote_context['shard_ranges'][0].object_count = 101 + remote_context['shard_ranges'][0].bytes_used = 1010 + remote_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **remote_context) + self._merge_object(index=7, **remote_context) + self._goto_sharded_state(remote_broker) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + # remote has newer timestamp for shard range + remote_broker.get_shard_ranges(include_own=True), + expect_success=True) + + self.assert_info_synced( + local_broker, 1, + mismatches=['status_changed_at', 'hash']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([shard_db], + get_db_files(remote_broker.db_file)) + self.assertEqual(remote_context['objects'][7:8], + remote_broker.get_objects()) + # remote shard range was newer than local so object count is not + # updated by sync'd shard range + self.assertEqual( + 101, remote_broker.get_shard_ranges()[0].object_count) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + # remote has newer timestamp for shard range + remote_broker.get_shard_ranges(include_own=True), + expect_success=True) + + def test_replication_local_sharded_remote_sharded(self): + self._check_replication_local_sharded_remote_sharded({}) + + def test_replication_local_sharded_remote_sharded_large_diff(self): + self._check_replication_local_sharded_remote_sharded({'per_diff': 1}) + + def test_replication_rsync_then_merge_aborts_before_merge_sharding(self): + # verify that rsync_then_merge aborts if remote starts sharding during + # the rsync + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=slice(0, 3), **local_context) + remote_context = self._setup_replication_test(1) + remote_broker = remote_context['broker'] + remote_broker.logger = debug_logger() + self._merge_object(index=5, **remote_context) + + orig_func = replicator.ContainerReplicatorRpc.rsync_then_merge + + def mock_rsync_then_merge(*args): + remote_broker.merge_shard_ranges( + ShardRange('.shards_a/cc', Timestamp.now())) + self._goto_sharding_state(remote_broker, Timestamp.now()) + return orig_func(*args) + + with mock.patch( + 'swift.container.replicator.ContainerReplicatorRpc.' + 'rsync_then_merge', + mock_rsync_then_merge): + with mock.patch( + 'swift.container.backend.ContainerBroker.' + 'get_items_since') as mock_get_items_since: + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, expect_success=False, + repl_conf={'per_diff': 1}) + + mock_get_items_since.assert_not_called() + self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'], + [call[0] for call in repl_calls]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_broker.get_info()['id'], + os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + def test_replication_rsync_then_merge_aborts_before_merge_sharded(self): + # verify that rsync_then_merge aborts if remote completes sharding + # during the rsync + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=slice(0, 3), **local_context) + remote_context = self._setup_replication_test(1) + remote_broker = remote_context['broker'] + remote_broker.logger = debug_logger() + self._merge_object(index=5, **remote_context) + + orig_func = replicator.ContainerReplicatorRpc.rsync_then_merge + + def mock_rsync_then_merge(*args): + remote_broker.merge_shard_ranges( + ShardRange('.shards_a/cc', Timestamp.now())) + self._goto_sharding_state(remote_broker, Timestamp.now()) + self._goto_sharded_state(remote_broker) + return orig_func(*args) + + with mock.patch( + 'swift.container.replicator.ContainerReplicatorRpc.' + 'rsync_then_merge', + mock_rsync_then_merge): + with mock.patch( + 'swift.container.backend.ContainerBroker.' + 'get_items_since') as mock_get_items_since: + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, expect_success=False, + repl_conf={'per_diff': 1}) + + mock_get_items_since.assert_not_called() + self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'], + [call[0] for call in repl_calls]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_broker.get_info()['id'], + os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + def test_replication_rsync_then_merge_aborts_after_merge_sharding(self): + # verify that rsync_then_merge aborts if remote starts sharding during + # the merge + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=slice(0, 3), **local_context) + remote_context = self._setup_replication_test(1) + remote_broker = remote_context['broker'] + remote_broker.logger = debug_logger() + self._merge_object(index=5, **remote_context) + + orig_get_items_since = backend.ContainerBroker.get_items_since + calls = [] + + def fake_get_items_since(broker, *args): + # remote starts sharding while rpc call is merging + if not calls: + remote_broker.merge_shard_ranges( + ShardRange('.shards_a/cc', Timestamp.now())) + self._goto_sharding_state(remote_broker, Timestamp.now()) + calls.append(args) + return orig_get_items_since(broker, *args) + + with mock.patch( + 'swift.container.backend.ContainerBroker.get_items_since', + fake_get_items_since): + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, expect_success=False, + repl_conf={'per_diff': 1}) + + self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'], + [call[0] for call in repl_calls]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_broker.get_info()['id'], + os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + def test_replication_rsync_then_merge_aborts_after_merge_sharded(self): + # verify that rsync_then_merge aborts if remote completes sharding + # during the merge + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=slice(0, 3), **local_context) + remote_context = self._setup_replication_test(1) + remote_broker = remote_context['broker'] + remote_broker.logger = debug_logger() + self._merge_object(index=5, **remote_context) + + orig_get_items_since = backend.ContainerBroker.get_items_since + calls = [] + + def fake_get_items_since(broker, *args): + # remote starts sharding while rpc call is merging + result = orig_get_items_since(broker, *args) + if calls: + remote_broker.merge_shard_ranges( + ShardRange('.shards_a/cc', Timestamp.now())) + self._goto_sharding_state(remote_broker, Timestamp.now()) + self._goto_sharded_state(remote_broker) + calls.append(args) + return result + + with mock.patch( + 'swift.container.backend.ContainerBroker.get_items_since', + fake_get_items_since): + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, expect_success=False, + repl_conf={'per_diff': 1}) + + self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'], + [call[0] for call in repl_calls]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_broker.get_info()['id'], + os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + if __name__ == '__main__': unittest.main() diff --git a/test/unit/container/test_server.py b/test/unit/container/test_server.py index 54ce6d973b..916f0e146d 100644 --- a/test/unit/container/test_server.py +++ b/test/unit/container/test_server.py @@ -22,6 +22,7 @@ import itertools from contextlib import contextmanager from shutil import rmtree from tempfile import mkdtemp +from test.unit import make_timestamp_iter, mock_timestamp_now from time import gmtime from xml.dom import minidom import time @@ -40,12 +41,13 @@ import swift.container from swift.container import server as container_server from swift.common import constraints from swift.common.utils import (Timestamp, mkdirs, public, replication, - storage_directory, lock_parent_directory) + storage_directory, lock_parent_directory, + ShardRange) from test.unit import fake_http_connect, debug_logger, mock_check_drive from swift.common.storage_policy import (POLICIES, StoragePolicy) from swift.common.request_helpers import get_sys_meta_prefix -from test import listen_zero +from test import listen_zero, annotate_failure from test.unit import patch_policies @@ -86,6 +88,16 @@ class TestContainerController(unittest.TestCase): """ pass + def _put_shard_range(self, shard_range): + put_timestamp = shard_range.timestamp.internal + headers = {'X-Backend-Record-Type': 'shard', + 'X-Timestamp': put_timestamp} + body = json.dumps([dict(shard_range)]) + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=body) + resp = req.get_response(self.controller) + self.assertIn(resp.status_int, (201, 202)) + def _check_put_container_storage_policy(self, req, policy_index): resp = req.get_response(self.controller) self.assertEqual(201, resp.status_int) @@ -95,6 +107,11 @@ class TestContainerController(unittest.TestCase): self.assertEqual(str(policy_index), resp.headers['X-Backend-Storage-Policy-Index']) + def _assert_shard_ranges_equal(self, x, y): + # ShardRange.__eq__ only compares lower and upper; here we generate + # dict representations to compare all attributes + self.assertEqual([dict(sr) for sr in x], [dict(sr) for sr in y]) + def test_creation(self): # later config should be extended to assert more config options replicator = container_server.ContainerController( @@ -424,7 +441,7 @@ class TestContainerController(unittest.TestCase): elif state[0] == 'race': # Save the original db_file attribute value self._saved_db_file = self.db_file - self.db_file += '.doesnotexist' + self._db_file += '.doesnotexist' def initialize(self, *args, **kwargs): if state[0] == 'initial': @@ -433,7 +450,7 @@ class TestContainerController(unittest.TestCase): elif state[0] == 'race': # Restore the original db_file attribute to get the race # behavior - self.db_file = self._saved_db_file + self._db_file = self._saved_db_file return super(InterceptedCoBr, self).initialize(*args, **kwargs) with mock.patch("swift.container.server.ContainerBroker", @@ -1372,21 +1389,100 @@ class TestContainerController(unittest.TestCase): self.assertEqual(resp.status_int, 500) def test_DELETE(self): + ts_iter = make_timestamp_iter() req = Request.blank( '/sda1/p/a/c', - environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '1'}) + environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': next(ts_iter).internal}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 201) + + # PUT an *empty* shard range + sr = ShardRange('.shards_a/c', next(ts_iter), 'l', 'u', 0, 0, + state=ShardRange.ACTIVE) req = Request.blank( '/sda1/p/a/c', - environ={'REQUEST_METHOD': 'DELETE'}, headers={'X-Timestamp': '2'}) + environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard'}, + body=json.dumps([dict(sr)])) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 202) + + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'X-Timestamp': next(ts_iter).internal}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 204) + req = Request.blank( '/sda1/p/a/c', - environ={'REQUEST_METHOD': 'GET'}, headers={'X-Timestamp': '3'}) + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 404) + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard'}, + params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 404) + + # the override-deleted header is ignored for object records + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Override-Deleted': 'true'}, + params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 404) + + # but override-deleted header makes shard ranges available after DELETE + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard', + 'X-Backend-Override-Deleted': 'true'}, + params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual([dict(sr, last_modified=sr.timestamp.isoformat)], + json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('shard', resp.headers['X-Backend-Record-Type']) + + # ... unless the override header equates to False + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard', + 'X-Backend-Override-Deleted': 'no'}, + params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 404) + self.assertNotIn('X-Backend-Record-Type', resp.headers) + + # ...or the db file is unlinked + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + self.assertTrue(os.path.exists(broker.db_file)) + os.unlink(broker.db_file) + self.assertFalse(os.path.exists(broker.db_file)) + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard', + 'X-Backend-Override-Deleted': 'true'}, + params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 404) + self.assertNotIn('X-Backend-Record-Type', resp.headers) def test_DELETE_PUT_recreate(self): path = '/sda1/p/a/c' @@ -1460,7 +1556,7 @@ class TestContainerController(unittest.TestCase): self.assertEqual(True, db.is_deleted()) # now save a copy of this db (and remove it from the "current node") db = self.controller._get_container_broker('sda1', 'p', 'a', 'c') - db_path = db.db_file + db_path = db._db_file other_path = os.path.join(self.testdir, 'othernode.db') os.rename(db_path, other_path) # that should make it missing on this node @@ -1474,6 +1570,8 @@ class TestContainerController(unittest.TestCase): def mock_exists(db_path): rv = _real_exists(db_path) + if db_path != db._db_file: + return rv if not mock_called: # be as careful as we might hope backend replication can be... with lock_parent_directory(db_path, timeout=1): @@ -2040,6 +2138,1140 @@ class TestContainerController(unittest.TestCase): resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 412) + def test_PUT_shard_range_autocreates_shard_container(self): + ts_iter = make_timestamp_iter() + shard_range = ShardRange('.shards_a/shard_c', next(ts_iter)) + put_timestamp = next(ts_iter).internal + headers = {'X-Backend-Record-Type': 'shard', + 'X-Timestamp': put_timestamp, + 'X-Container-Sysmeta-Test': 'set', + 'X-Container-Meta-Test': 'persisted'} + + # PUT shard range to non-existent container with non-autocreate prefix + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(404, resp.status_int) + + # PUT shard range to non-existent container with autocreate prefix, + # missing storage policy + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/.shards_a/shard_c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(400, resp.status_int) + self.assertIn('X-Backend-Storage-Policy-Index header is required', + resp.body) + + # PUT shard range to non-existent container with autocreate prefix + headers['X-Timestamp'] = next(ts_iter).internal + policy_index = random.choice(POLICIES).idx + headers['X-Backend-Storage-Policy-Index'] = str(policy_index) + req = Request.blank( + '/sda1/p/.shards_a/shard_c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + + # repeat PUT of shard range to autocreated container - 204 response + headers['X-Timestamp'] = next(ts_iter).internal + headers.pop('X-Backend-Storage-Policy-Index') # no longer required + req = Request.blank( + '/sda1/p/.shards_a/shard_c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + + # regular PUT to autocreated container - 204 response + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/.shards_a/shard_c', method='PUT', + headers={'X-Timestamp': next(ts_iter).internal}, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + + def test_PUT_shard_range_to_deleted_container(self): + ts_iter = make_timestamp_iter() + put_time = next(ts_iter).internal + # create a container, get it to sharded state and then delete it + req = Request.blank('/sda1/p/a/c', method='PUT', + headers={'X-Timestamp': put_time}) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + self.assertTrue(broker.set_sharded_state()) + + delete_time = next(ts_iter).internal + req = Request.blank('/sda1/p/a/c', method='DELETE', + headers={'X-Timestamp': delete_time}) + resp = req.get_response(self.controller) + self.assertEqual(204, resp.status_int) + self.assertTrue(broker.is_deleted()) + self.assertEqual(delete_time, broker.get_info()['delete_timestamp']) + self.assertEqual(put_time, broker.get_info()['put_timestamp']) + req = Request.blank('/sda1/p/a/c', method='GET') + resp = req.get_response(self.controller) + self.assertEqual(404, resp.status_int) + + # shard range PUT is accepted but container remains deleted + shard_range = ShardRange('.shards_a/shard_c', next(ts_iter), + state=ShardRange.ACTIVE) + headers = {'X-Backend-Record-Type': 'shard', + 'X-Timestamp': next(ts_iter).internal, + 'X-Container-Sysmeta-Test': 'set', + 'X-Container-Meta-Test': 'persisted'} + + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertTrue(broker.get_info_is_deleted()[1]) + self.assertEqual(delete_time, broker.get_info()['delete_timestamp']) + self.assertEqual(put_time, broker.get_info()['put_timestamp']) + req = Request.blank('/sda1/p/a/c', method='GET') + resp = req.get_response(self.controller) + self.assertEqual(404, resp.status_int) + + # unless shard range has non-zero stats, then container is revived + shard_range.update_meta(99, 1234, meta_timestamp=next(ts_iter)) + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertFalse(broker.get_info_is_deleted()[1]) + self.assertEqual(delete_time, broker.get_info()['delete_timestamp']) + self.assertEqual(put_time, broker.get_info()['put_timestamp']) + req = Request.blank('/sda1/p/a/c', method='GET') + resp = req.get_response(self.controller) + self.assertEqual(204, resp.status_int) + self.assertEqual('99', resp.headers['X-Container-Object-Count']) + + def test_PUT_shard_range_json_in_body(self): + ts_iter = make_timestamp_iter() + oldest_ts = next(ts_iter) # used for stale shard range PUT later + shard_bounds = [('', 'ham', ShardRange.ACTIVE), + ('ham', 'salami', ShardRange.ACTIVE), + ('salami', '', ShardRange.CREATED)] + shard_ranges = [ + ShardRange('.shards_a/_%s' % upper, next(ts_iter), + lower, upper, + i * 100, i * 1000, meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter)) + for i, (lower, upper, state) in enumerate(shard_bounds)] + + put_timestamp = next(ts_iter).internal + headers = {'X-Backend-Record-Type': 'shard', + 'X-Timestamp': put_timestamp, + 'X-Container-Sysmeta-Test': 'set', + 'X-Container-Meta-Test': 'persisted'} + body = json.dumps([dict(sr) for sr in shard_ranges[:2]]) + + # PUT some shard ranges to non-existent container + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=body) + resp = req.get_response(self.controller) + self.assertEqual(404, resp.status_int) + + # create the container with a regular PUT + req = Request.blank( + '/sda1/p/a/c', method='PUT', + headers={'X-Timestamp': put_timestamp}, body=body) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + + # now we can PUT shard ranges + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + + # check broker + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + # sysmeta and user meta is updated + exp_meta = {'X-Container-Sysmeta-Test': 'set', + 'X-Container-Meta-Test': 'persisted'} + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + self._assert_shard_ranges_equal(shard_ranges[:2], + broker.get_shard_ranges()) + + # empty json dict + body = json.dumps({}) + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal(shard_ranges[:2], + broker.get_shard_ranges()) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + older_ts = next(ts_iter) # used for stale shard range PUT later + # updated and new shard ranges + shard_ranges[1].bytes_used += 100 + shard_ranges[1].meta_timestamp = next(ts_iter) + body = json.dumps([dict(sr) for sr in shard_ranges[1:]]) + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + # stale shard range + stale_shard_range = shard_ranges[1].copy() + stale_shard_range.bytes_used = 0 + stale_shard_range.object_count = 0 + stale_shard_range.meta_timestamp = older_ts + stale_shard_range.state = ShardRange.CREATED + stale_shard_range.state_timestamp = oldest_ts + body = json.dumps([dict(stale_shard_range)]) + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + # deleted shard range + shard_ranges[0].deleted = 1 + shard_ranges[0].timestamp = next(ts_iter) + body = json.dumps([dict(shard_ranges[0])]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal( + shard_ranges, broker.get_shard_ranges(include_deleted=True)) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + def check_bad_body(body): + bad_put_timestamp = next(ts_iter).internal + headers['X-Timestamp'] = bad_put_timestamp + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(400, resp.status_int) + self.assertIn('Invalid body', resp.body) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal( + shard_ranges, broker.get_shard_ranges(include_deleted=True)) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + check_bad_body('not json') + check_bad_body('') + check_bad_body('["not a shard range"]') + check_bad_body('[[]]') + bad_shard_range = dict(ShardRange('a/c', next(ts_iter))) + bad_shard_range.pop('timestamp') + check_bad_body(json.dumps([bad_shard_range])) + + def check_not_shard_record_type(headers): + # body ignored + body = json.dumps([dict(sr) for sr in shard_ranges]) + # note, regular PUT so put timestamp is updated + put_timestamp = next(ts_iter).internal + headers['X-Timestamp'] = put_timestamp + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self._assert_shard_ranges_equal( + shard_ranges, broker.get_shard_ranges(include_deleted=True)) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + check_not_shard_record_type({'X-Backend-Record-Type': 'object', + 'X-Timestamp': next(ts_iter).internal}) + + check_not_shard_record_type({'X-Timestamp': next(ts_iter).internal}) + + def test_PUT_GET_shard_ranges(self): + # make a container + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers) + self.assertEqual(201, req.get_response(self.controller).status_int) + # PUT some objects + objects = [{'name': 'obj_%d' % i, + 'x-timestamp': next(ts_iter).normal, + 'x-content-type': 'text/plain', + 'x-etag': 'etag_%d' % i, + 'x-size': 1024 * i + } for i in range(2)] + for obj in objects: + req = Request.blank('/sda1/p/a/c/%s' % obj['name'], method='PUT', + headers=obj) + self._update_object_put_headers(req) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + # PUT some shard ranges + shard_bounds = [('', 'apple', ShardRange.SHRINKING), + ('apple', 'ham', ShardRange.CLEAVED), + ('ham', 'salami', ShardRange.ACTIVE), + ('salami', 'yoghurt', ShardRange.CREATED), + ('yoghurt', '', ShardRange.FOUND), + ] + shard_ranges = [ + ShardRange('.sharded_a/_%s' % upper, next(ts_iter), + lower, upper, + i * 100, i * 1000, meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter)) + for i, (lower, upper, state) in enumerate(shard_bounds)] + for shard_range in shard_ranges: + self._put_shard_range(shard_range) + + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + self.assertTrue(broker.is_root_container()) # sanity + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + + # sanity check - no shard ranges when GET is only for objects + def check_object_GET(path): + req = Request.blank(path, method='GET') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + expected = [ + dict(hash=obj['x-etag'], bytes=obj['x-size'], + content_type=obj['x-content-type'], + last_modified=Timestamp(obj['x-timestamp']).isoformat, + name=obj['name']) for obj in objects] + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('object', resp.headers['X-Backend-Record-Type']) + + check_object_GET('/sda1/p/a/c?format=json') + + # GET only shard ranges + def check_shard_GET(expected_shard_ranges, path, params=''): + req = Request.blank('/sda1/p/%s?format=json%s' % + (path, params), method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + expected = [ + dict(sr, last_modified=Timestamp(sr.timestamp).isoformat) + for sr in expected_shard_ranges] + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('shard', resp.headers['X-Backend-Record-Type']) + + # all shards + check_shard_GET(shard_ranges, 'a/c') + check_shard_GET(reversed(shard_ranges), 'a/c', params='&reverse=true') + # only created shards + check_shard_GET(shard_ranges[3:4], 'a/c', params='&states=created') + # only found shards + check_shard_GET(shard_ranges[4:5], 'a/c', params='&states=found') + # only cleaved shards + check_shard_GET(shard_ranges[1:2], 'a/c', + params='&states=cleaved') + # only active shards + check_shard_GET(shard_ranges[2:3], 'a/c', + params='&states=active&end_marker=pickle') + # only cleaved or active shards, reversed + check_shard_GET( + reversed(shard_ranges[1:3]), 'a/c', + params='&states=cleaved,active&reverse=true&marker=pickle') + # only shrinking shards + check_shard_GET(shard_ranges[:1], 'a/c', + params='&states=shrinking&end_marker=pickle') + check_shard_GET(shard_ranges[:1], 'a/c', + params='&states=shrinking&reverse=true&marker=pickle') + # only active or shrinking shards + check_shard_GET([shard_ranges[0], shard_ranges[2]], 'a/c', + params='&states=shrinking,active&end_marker=pickle') + check_shard_GET( + [shard_ranges[2], shard_ranges[0]], 'a/c', + params='&states=active,shrinking&reverse=true&marker=pickle') + # only active or shrinking shards using listing alias + check_shard_GET(shard_ranges[:3], 'a/c', + params='&states=listing&end_marker=pickle') + check_shard_GET( + reversed(shard_ranges[:3]), 'a/c', + params='&states=listing&reverse=true&marker=pickle') + # only created, cleaved, active, shrinking shards using updating alias + check_shard_GET(shard_ranges[1:4], 'a/c', + params='&states=updating&end_marker=treacle') + check_shard_GET( + reversed(shard_ranges[1:4]), 'a/c', + params='&states=updating&reverse=true&marker=treacle') + + # listing shards don't cover entire namespace so expect an extra filler + extra_shard_range = ShardRange( + 'a/c', ts_now, shard_ranges[2].upper, ShardRange.MAX, 2, 1024, + state=ShardRange.ACTIVE) + expected = shard_ranges[:3] + [extra_shard_range] + check_shard_GET(expected, 'a/c', params='&states=listing') + check_shard_GET(reversed(expected), 'a/c', + params='&states=listing&reverse=true') + expected = [shard_ranges[2], extra_shard_range] + check_shard_GET(expected, 'a/c', + params='&states=listing&marker=pickle') + check_shard_GET( + reversed(expected), 'a/c', + params='&states=listing&reverse=true&end_marker=pickle') + # updating shards don't cover entire namespace so expect a filler + extra_shard_range = ShardRange( + 'a/c', ts_now, shard_ranges[3].upper, ShardRange.MAX, 2, 1024, + state=ShardRange.ACTIVE) + expected = shard_ranges[1:4] + [extra_shard_range] + check_shard_GET(expected, 'a/c', params='&states=updating') + check_shard_GET(reversed(expected), 'a/c', + params='&states=updating&reverse=true') + # when no listing shard ranges cover the requested namespace range then + # filler is for entire requested namespace + extra_shard_range = ShardRange( + 'a/c', ts_now, 'treacle', ShardRange.MAX, 2, 1024, + state=ShardRange.ACTIVE) + check_shard_GET([extra_shard_range], 'a/c', + params='&states=listing&marker=treacle') + check_shard_GET( + [extra_shard_range], 'a/c', + params='&states=listing&reverse=true&end_marker=treacle') + extra_shard_range = ShardRange( + 'a/c', ts_now, 'treacle', 'walnut', 2, 1024, + state=ShardRange.ACTIVE) + params = '&states=listing&marker=treacle&end_marker=walnut' + check_shard_GET([extra_shard_range], 'a/c', params=params) + params = '&states=listing&reverse=true&marker=walnut' + \ + '&end_marker=treacle' + check_shard_GET([extra_shard_range], 'a/c', params=params) + # specific object + check_shard_GET(shard_ranges[1:2], 'a/c', params='&includes=cheese') + check_shard_GET(shard_ranges[1:2], 'a/c', params='&includes=ham') + check_shard_GET(shard_ranges[2:3], 'a/c', params='&includes=pickle') + check_shard_GET(shard_ranges[2:3], 'a/c', params='&includes=salami') + check_shard_GET(shard_ranges[3:4], 'a/c', params='&includes=walnut') + check_shard_GET(shard_ranges[3:4], 'a/c', + params='&includes=walnut&reverse=true') + # with marker + check_shard_GET(shard_ranges[1:], 'a/c', params='&marker=cheese') + check_shard_GET(reversed(shard_ranges[:2]), 'a/c', + params='&marker=cheese&reverse=true') + check_shard_GET(shard_ranges[2:], 'a/c', params='&marker=ham') + check_shard_GET(reversed(shard_ranges[:2]), 'a/c', + params='&marker=ham&reverse=true') + check_shard_GET(shard_ranges[2:], 'a/c', params='&marker=pickle') + check_shard_GET(reversed(shard_ranges[:3]), 'a/c', + params='&marker=pickle&reverse=true') + check_shard_GET(shard_ranges[3:], 'a/c', params='&marker=salami') + check_shard_GET(reversed(shard_ranges[:3]), 'a/c', + params='&marker=salami&reverse=true') + check_shard_GET(shard_ranges[3:], 'a/c', params='&marker=walnut') + check_shard_GET(reversed(shard_ranges[:4]), 'a/c', + params='&marker=walnut&reverse=true') + # with end marker + check_shard_GET(shard_ranges[:2], 'a/c', params='&end_marker=cheese') + check_shard_GET(reversed(shard_ranges[1:]), 'a/c', + params='&end_marker=cheese&reverse=true') + # everything in range 'apple' - 'ham' is <= end_marker of 'ham' so that + # range is not included because end_marker is non-inclusive + check_shard_GET(shard_ranges[:2], 'a/c', params='&end_marker=ham') + check_shard_GET(reversed(shard_ranges[2:]), 'a/c', + params='&end_marker=ham&reverse=true') + check_shard_GET(shard_ranges[:3], 'a/c', params='&end_marker=pickle') + check_shard_GET(reversed(shard_ranges[2:]), 'a/c', + params='&end_marker=pickle&reverse=true') + check_shard_GET(shard_ranges[:3], 'a/c', params='&end_marker=salami') + check_shard_GET(reversed(shard_ranges[3:]), 'a/c', + params='&end_marker=salami&reverse=true') + check_shard_GET(shard_ranges[:4], 'a/c', params='&end_marker=walnut') + check_shard_GET(reversed(shard_ranges[3:]), 'a/c', + params='&end_marker=walnut&reverse=true') + # with marker and end marker + check_shard_GET(shard_ranges[1:2], 'a/c', + params='&marker=cheese&end_marker=egg') + check_shard_GET(shard_ranges[1:2], 'a/c', + params='&end_marker=cheese&marker=egg&reverse=true') + check_shard_GET(shard_ranges[1:3], 'a/c', + params='&marker=egg&end_marker=jam') + check_shard_GET(reversed(shard_ranges[1:3]), 'a/c', + params='&end_marker=egg&marker=jam&reverse=true') + check_shard_GET(shard_ranges[1:4], 'a/c', + params='&marker=cheese&end_marker=walnut') + check_shard_GET(reversed(shard_ranges[1:4]), 'a/c', + params='&end_marker=cheese&marker=walnut&reverse=true') + check_shard_GET(shard_ranges[2:4], 'a/c', + params='&marker=jam&end_marker=walnut') + check_shard_GET(reversed(shard_ranges[2:4]), 'a/c', + params='&end_marker=jam&marker=walnut&reverse=true') + check_shard_GET(shard_ranges[3:4], 'a/c', + params='&marker=toast&end_marker=walnut') + check_shard_GET(shard_ranges[3:4], 'a/c', + params='&end_marker=toast&marker=walnut&reverse=true') + check_shard_GET([], 'a/c', + params='&marker=egg&end_marker=cheese') + check_shard_GET([], 'a/c', + params='&marker=cheese&end_marker=egg&reverse=true') + + # delete a shard range + shard_range = shard_ranges[1] + shard_range.set_deleted(timestamp=next(ts_iter)) + self._put_shard_range(shard_range) + + self._assert_shard_ranges_equal(shard_ranges[:1] + shard_ranges[2:], + broker.get_shard_ranges()) + + check_shard_GET(shard_ranges[:1] + shard_ranges[2:], 'a/c') + check_shard_GET(shard_ranges[2:3], 'a/c', params='&includes=jam') + # specify obj, marker or end_marker not in any shard range + check_shard_GET([], 'a/c', params='&includes=cheese') + check_shard_GET([], 'a/c', params='&includes=cheese&reverse=true') + check_shard_GET([], 'a/c', params='&includes=ham') + check_shard_GET(shard_ranges[2:], 'a/c/', params='&marker=cheese') + check_shard_GET(shard_ranges[:1], 'a/c/', + params='&marker=cheese&reverse=true') + check_shard_GET(shard_ranges[:1], 'a/c/', params='&end_marker=cheese') + check_shard_GET(reversed(shard_ranges[2:]), 'a/c/', + params='&end_marker=cheese&reverse=true') + + self.assertFalse(self.controller.logger.get_lines_for_level('warning')) + self.assertFalse(self.controller.logger.get_lines_for_level('error')) + + def test_GET_shard_ranges_using_state_aliases(self): + # make a shard container + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + shard_ranges = [] + lower = '' + for state in sorted(ShardRange.STATES.keys()): + upper = str(state) + shard_ranges.append( + ShardRange('.shards_a/c_%s' % upper, next(ts_iter), + lower, upper, state * 100, state * 1000, + meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter))) + lower = upper + + def do_test(root_path, path, params, expected_states): + expected = [ + sr for sr in shard_ranges if sr.state in expected_states] + own_shard_range = ShardRange(path, next(ts_iter), '', '', + state=ShardRange.ACTIVE) + expected.append(own_shard_range.copy( + lower=expected[-1].upper, meta_timestamp=ts_now)) + expected = [dict(sr, last_modified=sr.timestamp.isoformat) + for sr in expected] + headers = {'X-Timestamp': next(ts_iter).normal} + + # create container + req = Request.blank( + '/sda1/p/%s' % path, method='PUT', headers=headers) + self.assertIn( + req.get_response(self.controller).status_int, (201, 202)) + # PUT some shard ranges + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Container-Sysmeta-Shard-Root': root_path, + 'X-Backend-Record-Type': 'shard'} + body = json.dumps( + [dict(sr) for sr in shard_ranges + [own_shard_range]]) + req = Request.blank( + '/sda1/p/%s' % path, method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + + req = Request.blank('/sda1/p/%s?format=json%s' % + (path, params), method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('shard', resp.headers['X-Backend-Record-Type']) + + # root's shard ranges for listing + root_path = container_path = 'a/c' + params = '&states=listing' + expected_states = [ + ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING] + do_test(root_path, container_path, params, expected_states) + + # shard's shard ranges for listing + container_path = '.shards_a/c' + params = '&states=listing' + do_test(root_path, container_path, params, expected_states) + + # root's shard ranges for updating + params = '&states=updating' + expected_states = [ + ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE, + ShardRange.SHARDING] + container_path = root_path + do_test(root_path, container_path, params, expected_states) + + # shard's shard ranges for updating + container_path = '.shards_a/c' + do_test(root_path, container_path, params, expected_states) + + def test_GET_shard_ranges_include_deleted(self): + # make a shard container + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + shard_ranges = [] + lower = '' + for state in sorted(ShardRange.STATES.keys()): + upper = str(state) + shard_ranges.append( + ShardRange('.shards_a/c_%s' % upper, next(ts_iter), + lower, upper, state * 100, state * 1000, + meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter))) + lower = upper + # create container + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers) + self.assertIn( + req.get_response(self.controller).status_int, (201, 202)) + # PUT some shard ranges + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Backend-Record-Type': 'shard'} + body = json.dumps([dict(sr) for sr in shard_ranges]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + + def do_test(include_deleted, expected): + expected = [dict(sr, last_modified=sr.timestamp.isoformat) + for sr in expected] + headers = {'X-Backend-Record-Type': 'shard', + 'X-Backend-Include-Deleted': str(include_deleted)} + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers=headers) + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('shard', resp.headers['X-Backend-Record-Type']) + + do_test(False, shard_ranges) + do_test(True, shard_ranges) + + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Backend-Record-Type': 'shard'} + for sr in shard_ranges[::2]: + sr.set_deleted(timestamp=next(ts_iter)) + body = json.dumps([dict(sr) for sr in shard_ranges]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + self._assert_shard_ranges_equal( + shard_ranges[1::2], broker.get_shard_ranges()) + do_test(False, shard_ranges[1::2]) + do_test(True, shard_ranges) + + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Backend-Record-Type': 'shard'} + for sr in shard_ranges[1::2]: + sr.set_deleted(timestamp=next(ts_iter)) + body = json.dumps([dict(sr) for sr in shard_ranges]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + self.assertFalse(broker.get_shard_ranges()) + do_test(False, []) + do_test(True, shard_ranges) + + def test_GET_shard_ranges_errors(self): + # verify that x-backend-record-type is not included in error responses + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + shard_ranges = [] + lower = '' + for state in sorted(ShardRange.STATES.keys()): + upper = str(state) + shard_ranges.append( + ShardRange('.shards_a/c_%s' % upper, next(ts_iter), + lower, upper, state * 100, state * 1000, + meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter))) + lower = upper + # create container + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers) + self.assertIn( + req.get_response(self.controller).status_int, (201, 202)) + # PUT some shard ranges + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Backend-Record-Type': 'shard'} + body = json.dumps([dict(sr) for sr in shard_ranges]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + + def do_test(params): + params['format'] = 'json' + headers = {'X-Backend-Record-Type': 'shard'} + req = Request.blank('/sda1/p/a/c', method='GET', + headers=headers, params=params) + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.content_type, 'text/html') + self.assertNotIn('X-Backend-Record-Type', resp.headers) + self.assertNotIn('X-Backend-Sharding-State', resp.headers) + self.assertNotIn('X-Container-Object-Count', resp.headers) + self.assertNotIn('X-Container-Bytes-Used', resp.headers) + self.assertNotIn('X-Timestamp', resp.headers) + self.assertNotIn('X-PUT-Timestamp', resp.headers) + return resp + + resp = do_test({'states': 'bad'}) + self.assertEqual(resp.status_int, 400) + resp = do_test({'delimiter': 'bad'}) + self.assertEqual(resp.status_int, 412) + resp = do_test({'limit': str(constraints.CONTAINER_LISTING_LIMIT + 1)}) + self.assertEqual(resp.status_int, 412) + with mock.patch('swift.container.server.check_drive', + lambda *args: False): + resp = do_test({}) + self.assertEqual(resp.status_int, 507) + + # delete the container + req = Request.blank('/sda1/p/a/c', method='DELETE', + headers={'X-Timestamp': next(ts_iter).normal}) + self.assertEqual(204, req.get_response(self.controller).status_int) + + resp = do_test({'states': 'bad'}) + self.assertEqual(resp.status_int, 404) + + def test_GET_auto_record_type(self): + # make a container + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers) + self.assertEqual(201, req.get_response(self.controller).status_int) + # PUT some objects + objects = [{'name': 'obj_%d' % i, + 'x-timestamp': next(ts_iter).normal, + 'x-content-type': 'text/plain', + 'x-etag': 'etag_%d' % i, + 'x-size': 1024 * i + } for i in range(2)] + for obj in objects: + req = Request.blank('/sda1/p/a/c/%s' % obj['name'], method='PUT', + headers=obj) + self._update_object_put_headers(req) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + # PUT some shard ranges + shard_bounds = [('', 'm', ShardRange.CLEAVED), + ('m', '', ShardRange.CREATED)] + shard_ranges = [ + ShardRange('.sharded_a/_%s' % upper, next(ts_iter), + lower, upper, + i * 100, i * 1000, meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter)) + for i, (lower, upper, state) in enumerate(shard_bounds)] + for shard_range in shard_ranges: + self._put_shard_range(shard_range) + + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + + def assert_GET_objects(req, expected_objects): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + expected = [ + dict(hash=obj['x-etag'], bytes=obj['x-size'], + content_type=obj['x-content-type'], + last_modified=Timestamp(obj['x-timestamp']).isoformat, + name=obj['name']) for obj in expected_objects] + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual( + 'object', resp.headers.pop('X-Backend-Record-Type')) + resp.headers.pop('Content-Length') + return resp + + def assert_GET_shard_ranges(req, expected_shard_ranges): + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + expected = [ + dict(sr, last_modified=Timestamp(sr.timestamp).isoformat) + for sr in expected_shard_ranges] + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual( + 'shard', resp.headers.pop('X-Backend-Record-Type')) + resp.headers.pop('Content-Length') + return resp + + # unsharded + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'auto'}) + resp = assert_GET_objects(req, objects) + headers = resp.headers + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'object'}) + resp = assert_GET_objects(req, objects) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET') + resp = assert_GET_objects(req, objects) + self.assertEqual(headers, resp.headers) + + # move to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'auto'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + headers = resp.headers + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'object'}) + resp = assert_GET_objects(req, objects) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET') + resp = assert_GET_objects(req, objects) + self.assertEqual(headers, resp.headers) + + # limit is applied to objects but not shard ranges + req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET', + headers={'X-Backend-Record-Type': 'auto'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + headers = resp.headers + req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET', + headers={'X-Backend-Record-Type': 'object'}) + resp = assert_GET_objects(req, objects[:1]) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET') + resp = assert_GET_objects(req, objects[:1]) + self.assertEqual(headers, resp.headers) + + # move to sharded state + self.assertTrue(broker.set_sharded_state()) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'auto'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + headers = resp.headers + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'object'}) + resp = assert_GET_objects(req, []) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET') + resp = assert_GET_objects(req, []) + self.assertEqual(headers, resp.headers) + + def test_PUT_GET_to_sharding_container(self): + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + ts_iter = make_timestamp_iter() + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers) + self.assertEqual(201, req.get_response(self.controller).status_int) + + def do_update(name, timestamp=None, headers=None): + # Make a PUT request to container controller to update an object + timestamp = timestamp or next(ts_iter) + headers = headers or {} + headers.update({'X-Timestamp': timestamp.internal, + 'X-Size': 17, + 'X-Content-Type': 'text/plain', + 'X-Etag': 'fake etag'}) + req = Request.blank( + '/sda1/p/a/c/%s' % name, method='PUT', headers=headers) + self._update_object_put_headers(req) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + + def get_api_listing(): + req = Request.blank( + '/sda1/p/a/c', method='GET', params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(200, resp.status_int) + return [obj['name'] for obj in json.loads(resp.body)] + + def assert_broker_rows(broker, expected_names, expected_max_row): + self.assertEqual(expected_max_row, broker.get_max_row()) + with broker.get() as conn: + curs = conn.execute(''' + SELECT * FROM object WHERE ROWID > -1 ORDER BY ROWID ASC + ''') + actual = [r[1] for r in curs] + + self.assertEqual(expected_names, actual) + + do_update('unsharded') + self.assertEqual(['unsharded'], get_api_listing()) + assert_broker_rows(broker, ['unsharded'], 1) + + # move container to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1) + assert_broker_rows(broker.get_brokers()[1], [], 1) + + # add another update - should not merge into the older db and therefore + # not appear in api listing + do_update('sharding') + self.assertEqual(['unsharded'], get_api_listing()) + assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1) + assert_broker_rows(broker.get_brokers()[1], ['sharding'], 2) + + orig_lister = swift.container.backend.ContainerBroker.list_objects_iter + + def mock_list_objects_iter(*args, **kwargs): + # cause an update to land in the pending file after it has been + # flushed by get_info() calls in the container PUT method, but + # before it is flushed by the call to list_objects_iter + do_update('racing_update') + return orig_lister(*args, **kwargs) + + with mock.patch( + 'swift.container.backend.ContainerBroker.list_objects_iter', + mock_list_objects_iter): + listing = get_api_listing() + + self.assertEqual(['unsharded'], listing) + assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1) + assert_broker_rows(broker.get_brokers()[1], ['sharding'], 2) + + # next listing will flush pending file + listing = get_api_listing() + self.assertEqual(['unsharded'], listing) + assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1) + assert_broker_rows(broker.get_brokers()[1], + ['sharding', 'racing_update'], 3) + + def _check_object_update_redirected_to_shard(self, method): + expected_status = 204 if method == 'DELETE' else 201 + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + ts_iter = make_timestamp_iter() + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers) + self.assertEqual(201, req.get_response(self.controller).status_int) + + def do_update(name, timestamp=None, headers=None): + # Make a PUT request to container controller to update an object + timestamp = timestamp or next(ts_iter) + headers = headers or {} + headers.update({'X-Timestamp': timestamp.internal, + 'X-Size': 17, + 'X-Content-Type': 'text/plain', + 'X-Etag': 'fake etag'}) + req = Request.blank( + '/sda1/p/a/c/%s' % name, method=method, headers=headers) + self._update_object_put_headers(req) + return req.get_response(self.controller) + + def get_listing(broker_index): + # index -1 is always the freshest db + sub_broker = broker.get_brokers()[broker_index] + return sub_broker.get_objects() + + def assert_not_redirected(obj_name, timestamp=None, headers=None): + resp = do_update(obj_name, timestamp=timestamp, headers=headers) + self.assertEqual(expected_status, resp.status_int) + self.assertNotIn('Location', resp.headers) + self.assertNotIn('X-Backend-Redirect-Timestamp', resp.headers) + + def assert_redirected(obj_name, shard_range, headers=None): + resp = do_update(obj_name, headers=headers) + self.assertEqual(301, resp.status_int) + self.assertEqual('/%s/%s' % (shard_range.name, obj_name), + resp.headers['Location']) + self.assertEqual(shard_range.timestamp.internal, + resp.headers['X-Backend-Redirect-Timestamp']) + + # sanity check + ts_bashful_orig = next(ts_iter) + mocked_fn = 'swift.container.backend.ContainerBroker.get_shard_ranges' + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected('bashful', ts_bashful_orig) + mock_get_shard_ranges.assert_not_called() + + shard_ranges = { + 'dopey': ShardRange( + '.sharded_a/sr_dopey', next(ts_iter), '', 'dopey'), + 'happy': ShardRange( + '.sharded_a/sr_happy', next(ts_iter), 'dopey', 'happy'), + '': ShardRange('.sharded_a/sr_', next(ts_iter), 'happy', '') + } + # start with only the middle shard range + self._put_shard_range(shard_ranges['happy']) + + # db not yet sharding but shard ranges exist + sr_happy = shard_ranges['happy'] + redirect_states = ( + ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE, + ShardRange.SHARDING) + headers = {'X-Backend-Accept-Redirect': 'true'} + for state in ShardRange.STATES: + self.assertTrue( + sr_happy.update_state(state, + state_timestamp=next(ts_iter))) + self._put_shard_range(sr_happy) + with annotate_failure(state): + obj_name = 'grumpy%s' % state + if state in redirect_states: + assert_redirected(obj_name, sr_happy, headers=headers) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + else: + assert_not_redirected(obj_name, headers=headers) + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + obj_name = 'grumpy%s_no_header' % state + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected(obj_name) + mock_get_shard_ranges.assert_not_called() + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + + # set broker to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + for state in ShardRange.STATES: + self.assertTrue( + sr_happy.update_state(state, + state_timestamp=next(ts_iter))) + self._put_shard_range(sr_happy) + with annotate_failure(state): + obj_name = 'grumpier%s' % state + if state in redirect_states: + assert_redirected(obj_name, sr_happy, headers=headers) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + else: + assert_not_redirected(obj_name, headers=headers) + # update goes to fresh db, misplaced + self.assertIn( + obj_name, [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn( + obj_name, [obj['name'] for obj in get_listing(0)]) + obj_name = 'grumpier%s_no_header' % state + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected(obj_name) + mock_get_shard_ranges.assert_not_called() + self.assertIn( + obj_name, [obj['name'] for obj in get_listing(-1)]) + # update is misplaced, not in retiring db + self.assertNotIn( + obj_name, [obj['name'] for obj in get_listing(0)]) + + # no shard for this object yet so it is accepted by root container + # and stored in misplaced objects... + assert_not_redirected('dopey', timestamp=next(ts_iter)) + self.assertIn('dopey', [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn('dopey', [obj['name'] for obj in get_listing(0)]) + + # now PUT the first shard range + sr_dopey = shard_ranges['dopey'] + sr_dopey.update_state(ShardRange.CLEAVED, + state_timestamp=next(ts_iter)) + self._put_shard_range(sr_dopey) + for state in ShardRange.STATES: + self.assertTrue( + sr_happy.update_state(state, + state_timestamp=next(ts_iter))) + self._put_shard_range(sr_happy) + with annotate_failure(state): + obj_name = 'dopey%s' % state + if state in redirect_states: + assert_redirected(obj_name, sr_happy, headers=headers) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(0)]) + else: + assert_not_redirected(obj_name, headers=headers) + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(0)]) + obj_name = 'dopey%s_no_header' % state + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected(obj_name) + mock_get_shard_ranges.assert_not_called() + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(0)]) + + # further updates to bashful and dopey are now redirected... + assert_redirected('bashful', sr_dopey, headers=headers) + assert_redirected('dopey', sr_dopey, headers=headers) + # ...and existing updates in this container are *not* updated + self.assertEqual([ts_bashful_orig.internal], + [obj['created_at'] for obj in get_listing(0) + if obj['name'] == 'bashful']) + + # set broker to sharded state + self.assertTrue(broker.set_sharded_state()) + for state in ShardRange.STATES: + self.assertTrue( + sr_happy.update_state(state, + state_timestamp=next(ts_iter))) + self._put_shard_range(sr_happy) + with annotate_failure(state): + obj_name = 'grumpiest%s' % state + if state in redirect_states: + assert_redirected(obj_name, sr_happy, headers=headers) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + else: + assert_not_redirected(obj_name, headers=headers) + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + obj_name = 'grumpiest%s_no_header' % state + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected(obj_name) + mock_get_shard_ranges.assert_not_called() + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + + def test_PUT_object_update_redirected_to_shard(self): + self._check_object_update_redirected_to_shard('PUT') + + def test_DELETE_object_update_redirected_to_shard(self): + self._check_object_update_redirected_to_shard('DELETE') + def test_GET_json(self): # make a container req = Request.blank( @@ -2389,7 +3621,7 @@ class TestContainerController(unittest.TestCase): req = Request.blank( '/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '0'}) - resp = req.get_response(self.controller) + req.get_response(self.controller) # fill the container for i in range(3): req = Request.blank( @@ -2407,6 +3639,24 @@ class TestContainerController(unittest.TestCase): resp = req.get_response(self.controller) result = resp.body.split() self.assertEqual(result, ['2', ]) + # test limit with end_marker + req = Request.blank('/sda1/p/a/c?limit=2&end_marker=1', + environ={'REQUEST_METHOD': 'GET'}) + resp = req.get_response(self.controller) + result = resp.body.split() + self.assertEqual(result, ['0', ]) + # test limit, reverse with end_marker + req = Request.blank('/sda1/p/a/c?limit=2&end_marker=1&reverse=True', + environ={'REQUEST_METHOD': 'GET'}) + resp = req.get_response(self.controller) + result = resp.body.split() + self.assertEqual(result, ['2', ]) + # test marker > end_marker + req = Request.blank('/sda1/p/a/c?marker=2&end_marker=1', + environ={'REQUEST_METHOD': 'GET'}) + resp = req.get_response(self.controller) + result = resp.body.split() + self.assertEqual(result, []) def test_weird_content_types(self): snowman = u'\u2603' diff --git a/test/unit/container/test_sharder.py b/test/unit/container/test_sharder.py new file mode 100644 index 0000000000..353d980bbf --- /dev/null +++ b/test/unit/container/test_sharder.py @@ -0,0 +1,4580 @@ +# Copyright (c) 2010-2017 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import hashlib +import json +import random + +import eventlet +import os +import shutil +from contextlib import contextmanager +from tempfile import mkdtemp + +import mock +import unittest + +from collections import defaultdict + +import time + +from copy import deepcopy + +from swift.common import internal_client +from swift.container import replicator +from swift.container.backend import ContainerBroker, UNSHARDED, SHARDING, \ + SHARDED, DATADIR +from swift.container.sharder import ContainerSharder, sharding_enabled, \ + CleavingContext, DEFAULT_SHARD_SHRINK_POINT, \ + DEFAULT_SHARD_CONTAINER_THRESHOLD +from swift.common.utils import ShardRange, Timestamp, hash_path, \ + encode_timestamps, parse_db_filename, quorum_size, Everything +from test import annotate_failure + +from test.unit import FakeLogger, debug_logger, FakeRing, \ + make_timestamp_iter, unlink_files, mocked_http_conn, mock_timestamp_now, \ + attach_fake_replication_rpc + + +class BaseTestSharder(unittest.TestCase): + def setUp(self): + self.tempdir = mkdtemp() + self.ts_iter = make_timestamp_iter() + + def tearDown(self): + shutil.rmtree(self.tempdir, ignore_errors=True) + + def _assert_shard_ranges_equal(self, expected, actual): + self.assertEqual([dict(sr) for sr in expected], + [dict(sr) for sr in actual]) + + def _make_broker(self, account='a', container='c', epoch=None, + device='sda', part=0, hash_=None): + hash_ = hash_ or hashlib.md5(container).hexdigest() + datadir = os.path.join( + self.tempdir, device, 'containers', str(part), hash_[-3:], hash_) + if epoch: + filename = '%s_%s.db' % (hash, epoch) + else: + filename = hash_ + '.db' + db_file = os.path.join(datadir, filename) + broker = ContainerBroker( + db_file, account=account, container=container, + logger=debug_logger()) + broker.initialize() + return broker + + def _make_sharding_broker(self, account='a', container='c', + shard_bounds=(('', 'middle'), ('middle', ''))): + broker = self._make_broker(account=account, container=container) + broker.set_sharding_sysmeta('Root', 'a/c') + old_db_id = broker.get_info()['id'] + broker.enable_sharding(next(self.ts_iter)) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CLEAVED) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + broker = ContainerBroker(broker.db_file, account='a', container='c') + self.assertNotEqual(old_db_id, broker.get_info()['id']) # sanity check + return broker + + def _make_shard_ranges(self, bounds, state=None, object_count=0): + return [ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), + lower, upper, state=state, + object_count=object_count) + for lower, upper in bounds] + + def ts_encoded(self): + # make a unique timestamp string with multiple timestamps encoded; + # use different deltas between component timestamps + timestamps = [next(self.ts_iter) for i in range(4)] + return encode_timestamps( + timestamps[0], timestamps[1], timestamps[3]) + + +class TestSharder(BaseTestSharder): + def test_init(self): + def do_test(conf, expected): + with mock.patch( + 'swift.container.sharder.internal_client.InternalClient') \ + as mock_ic: + with mock.patch('swift.common.db_replicator.ring.Ring') \ + as mock_ring: + mock_ring.return_value = mock.MagicMock() + mock_ring.return_value.replica_count = 3 + sharder = ContainerSharder(conf) + mock_ring.assert_called_once_with( + '/etc/swift', ring_name='container') + self.assertEqual( + 'container-sharder', sharder.logger.logger.name) + for k, v in expected.items(): + self.assertTrue(hasattr(sharder, k), 'Missing attr %s' % k) + self.assertEqual(v, getattr(sharder, k), + 'Incorrect value: expected %s=%s but got %s' % + (k, v, getattr(sharder, k))) + return mock_ic + + expected = { + 'mount_check': True, 'bind_ip': '0.0.0.0', 'port': 6201, + 'per_diff': 1000, 'max_diffs': 100, 'interval': 30, + 'cleave_row_batch_size': 10000, + 'node_timeout': 10, 'conn_timeout': 5, + 'rsync_compress': False, + 'rsync_module': '{replication_ip}::container', + 'reclaim_age': 86400 * 7, + 'shard_shrink_point': 0.25, + 'shrink_merge_point': 0.75, + 'shard_container_threshold': 10000000, + 'split_size': 5000000, + 'cleave_batch_size': 2, + 'scanner_batch_size': 10, + 'rcache': '/var/cache/swift/container.recon', + 'shards_account_prefix': '.shards_', + 'auto_shard': False, + 'recon_candidates_limit': 5, + 'shard_replication_quorum': 2, + 'existing_shard_replication_quorum': 2 + } + mock_ic = do_test({}, expected) + mock_ic.assert_called_once_with( + '/etc/swift/internal-client.conf', 'Swift Container Sharder', 3, + allow_modify_pipeline=False) + + conf = { + 'mount_check': False, 'bind_ip': '10.11.12.13', 'bind_port': 62010, + 'per_diff': 2000, 'max_diffs': 200, 'interval': 60, + 'cleave_row_batch_size': 3000, + 'node_timeout': 20, 'conn_timeout': 1, + 'rsync_compress': True, + 'rsync_module': '{replication_ip}::container_sda/', + 'reclaim_age': 86400 * 14, + 'shard_shrink_point': 35, + 'shard_shrink_merge_point': 85, + 'shard_container_threshold': 20000000, + 'cleave_batch_size': 4, + 'shard_scanner_batch_size': 8, + 'request_tries': 2, + 'internal_client_conf_path': '/etc/swift/my-sharder-ic.conf', + 'recon_cache_path': '/var/cache/swift-alt', + 'auto_create_account_prefix': '...', + 'auto_shard': 'yes', + 'recon_candidates_limit': 10, + 'shard_replication_quorum': 1, + 'existing_shard_replication_quorum': 0 + } + expected = { + 'mount_check': False, 'bind_ip': '10.11.12.13', 'port': 62010, + 'per_diff': 2000, 'max_diffs': 200, 'interval': 60, + 'cleave_row_batch_size': 3000, + 'node_timeout': 20, 'conn_timeout': 1, + 'rsync_compress': True, + 'rsync_module': '{replication_ip}::container_sda', + 'reclaim_age': 86400 * 14, + 'shard_shrink_point': 0.35, + 'shrink_merge_point': 0.85, + 'shard_container_threshold': 20000000, + 'split_size': 10000000, + 'cleave_batch_size': 4, + 'scanner_batch_size': 8, + 'rcache': '/var/cache/swift-alt/container.recon', + 'shards_account_prefix': '...shards_', + 'auto_shard': True, + 'recon_candidates_limit': 10, + 'shard_replication_quorum': 1, + 'existing_shard_replication_quorum': 0 + } + mock_ic = do_test(conf, expected) + mock_ic.assert_called_once_with( + '/etc/swift/my-sharder-ic.conf', 'Swift Container Sharder', 2, + allow_modify_pipeline=False) + + expected.update({'shard_replication_quorum': 3, + 'existing_shard_replication_quorum': 3}) + conf.update({'shard_replication_quorum': 4, + 'existing_shard_replication_quorum': 4}) + do_test(conf, expected) + + with self.assertRaises(ValueError) as cm: + do_test({'shard_shrink_point': 101}, {}) + self.assertIn( + 'greater than 0, less than 100, not "101"', cm.exception.message) + self.assertIn('shard_shrink_point', cm.exception.message) + + with self.assertRaises(ValueError) as cm: + do_test({'shard_shrink_merge_point': 101}, {}) + self.assertIn( + 'greater than 0, less than 100, not "101"', cm.exception.message) + self.assertIn('shard_shrink_merge_point', cm.exception.message) + + def test_init_internal_client_conf_loading_error(self): + with mock.patch('swift.common.db_replicator.ring.Ring') \ + as mock_ring: + mock_ring.return_value = mock.MagicMock() + mock_ring.return_value.replica_count = 3 + with self.assertRaises(SystemExit) as cm: + ContainerSharder( + {'internal_client_conf_path': + os.path.join(self.tempdir, 'nonexistent')}) + self.assertIn('Unable to load internal client', str(cm.exception)) + + with mock.patch('swift.common.db_replicator.ring.Ring') \ + as mock_ring: + mock_ring.return_value = mock.MagicMock() + mock_ring.return_value.replica_count = 3 + with mock.patch( + 'swift.container.sharder.internal_client.InternalClient', + side_effect=Exception('kaboom')): + with self.assertRaises(Exception) as cm: + ContainerSharder({}) + self.assertIn('kaboom', str(cm.exception)) + + def _assert_stats(self, expected, sharder, category): + # assertEqual doesn't work with a defaultdict + stats = sharder.stats['sharding'][category] + for k, v in expected.items(): + actual = stats[k] + self.assertEqual( + v, actual, 'Expected %s but got %s for %s in %s' % + (v, actual, k, stats)) + return stats + + def _assert_recon_stats(self, expected, sharder, category): + with open(sharder.rcache, 'rb') as fd: + recon = json.load(fd) + stats = recon['sharding_stats']['sharding'].get(category) + self.assertEqual(expected, stats) + + def test_increment_stats(self): + with self._mock_sharder() as sharder: + sharder._increment_stat('visited', 'success') + sharder._increment_stat('visited', 'success') + sharder._increment_stat('visited', 'failure') + sharder._increment_stat('visited', 'completed') + sharder._increment_stat('cleaved', 'success') + sharder._increment_stat('scanned', 'found', step=4) + expected = {'success': 2, + 'failure': 1, + 'completed': 1} + self._assert_stats(expected, sharder, 'visited') + self._assert_stats({'success': 1}, sharder, 'cleaved') + self._assert_stats({'found': 4}, sharder, 'scanned') + + def test_increment_stats_with_statsd(self): + with self._mock_sharder() as sharder: + sharder._increment_stat('visited', 'success', statsd=True) + sharder._increment_stat('visited', 'success', statsd=True) + sharder._increment_stat('visited', 'failure', statsd=True) + sharder._increment_stat('visited', 'failure', statsd=False) + sharder._increment_stat('visited', 'completed') + expected = {'success': 2, + 'failure': 2, + 'completed': 1} + self._assert_stats(expected, sharder, 'visited') + counts = sharder.logger.get_increment_counts() + self.assertEqual(2, counts.get('visited_success')) + self.assertEqual(1, counts.get('visited_failure')) + self.assertIsNone(counts.get('visited_completed')) + + def test_run_forever(self): + conf = {'recon_cache_path': self.tempdir, + 'devices': self.tempdir} + with self._mock_sharder(conf) as sharder: + sharder._check_node = lambda *args: True + sharder.logger.clear() + brokers = [] + for container in ('c1', 'c2'): + broker = self._make_broker( + container=container, hash_=container + 'hash', + device=sharder.ring.devs[0]['device'], part=0) + broker.update_metadata({'X-Container-Sysmeta-Sharding': + ('true', next(self.ts_iter).internal)}) + brokers.append(broker) + + fake_stats = { + 'scanned': {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 2, 'min_time': 99, 'max_time': 123}, + 'created': {'attempted': 1, 'success': 1, 'failure': 1}, + 'cleaved': {'attempted': 1, 'success': 1, 'failure': 0, + 'min_time': 0.01, 'max_time': 1.3}, + 'misplaced': {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 1, 'unplaced': 0}, + 'audit_root': {'attempted': 5, 'success': 4, 'failure': 1}, + 'audit_shard': {'attempted': 2, 'success': 2, 'failure': 0}, + } + # NB these are time increments not absolute times... + fake_periods = [1, 2, 3, 3600, 4, 15, 15, 0] + fake_periods_iter = iter(fake_periods) + recon_data = [] + fake_process_broker_calls = [] + + def mock_dump_recon_cache(data, *args): + recon_data.append(deepcopy(data)) + + with mock.patch('swift.container.sharder.time.time') as fake_time: + def fake_process_broker(broker, *args, **kwargs): + # increment time and inject some fake stats + fake_process_broker_calls.append((broker, args, kwargs)) + try: + fake_time.return_value += next(fake_periods_iter) + except StopIteration: + # bail out + fake_time.side_effect = Exception('Test over') + sharder.stats['sharding'].update(fake_stats) + + with mock.patch( + 'swift.container.sharder.time.sleep') as mock_sleep: + with mock.patch( + 'swift.container.sharder.is_sharding_candidate', + return_value=True): + with mock.patch( + 'swift.container.sharder.dump_recon_cache', + mock_dump_recon_cache): + fake_time.return_value = next(fake_periods_iter) + sharder._is_sharding_candidate = lambda x: True + sharder._process_broker = fake_process_broker + with self.assertRaises(Exception) as cm: + sharder.run_forever() + + self.assertEqual('Test over', cm.exception.message) + # four cycles are started, two brokers visited per cycle, but + # fourth never completes + self.assertEqual(8, len(fake_process_broker_calls)) + # expect initial random sleep then one sleep between first and + # second pass + self.assertEqual(2, mock_sleep.call_count) + self.assertLessEqual(mock_sleep.call_args_list[0][0][0], 30) + self.assertLessEqual(mock_sleep.call_args_list[1][0][0], + 30 - fake_periods[0]) + + lines = sharder.logger.get_lines_for_level('info') + categories = ('visited', 'scanned', 'created', 'cleaved', + 'misplaced', 'audit_root', 'audit_shard') + + def check_categories(start_time): + for category in categories: + line = lines.pop(0) + self.assertIn('Since %s' % time.ctime(start_time), line) + self.assertIn(category, line) + for k, v in fake_stats.get(category, {}).items(): + self.assertIn('%s:%s' % (k, v), line) + + def check_logs(cycle_time, start_time, + expect_periodic_stats=False): + self.assertIn('Container sharder cycle starting', lines.pop(0)) + check_categories(start_time) + if expect_periodic_stats: + check_categories(start_time) + self.assertIn('Container sharder cycle completed: %.02fs' % + cycle_time, lines.pop(0)) + + check_logs(sum(fake_periods[1:3]), fake_periods[0]) + check_logs(sum(fake_periods[3:5]), sum(fake_periods[:3]), + expect_periodic_stats=True) + check_logs(sum(fake_periods[5:7]), sum(fake_periods[:5])) + # final cycle start but then exception pops to terminate test + self.assertIn('Container sharder cycle starting', lines.pop(0)) + self.assertFalse(lines) + lines = sharder.logger.get_lines_for_level('error') + self.assertIn( + 'Unhandled exception while dumping progress', lines[0]) + self.assertIn('Test over', lines[0]) + + def check_recon(data, time, last, expected_stats): + self.assertEqual(time, data['sharding_time']) + self.assertEqual(last, data['sharding_last']) + self.assertEqual( + expected_stats, dict(data['sharding_stats']['sharding'])) + + def stats_for_candidate(broker): + return {'object_count': 0, + 'account': broker.account, + 'meta_timestamp': mock.ANY, + 'container': broker.container, + 'file_size': os.stat(broker.db_file).st_size, + 'path': broker.db_file, + 'root': broker.path, + 'node_index': 0} + + self.assertEqual(4, len(recon_data)) + # stats report at end of first cycle + fake_stats.update({'visited': {'attempted': 2, 'skipped': 0, + 'success': 2, 'failure': 0, + 'completed': 0}}) + fake_stats.update({ + 'sharding_candidates': { + 'found': 2, + 'top': [stats_for_candidate(call[0]) + for call in fake_process_broker_calls[:2]] + } + }) + check_recon(recon_data[0], sum(fake_periods[1:3]), + sum(fake_periods[:3]), fake_stats) + # periodic stats report after first broker has been visited during + # second cycle - one candidate identified so far this cycle + fake_stats.update({'visited': {'attempted': 1, 'skipped': 0, + 'success': 1, 'failure': 0, + 'completed': 0}}) + fake_stats.update({ + 'sharding_candidates': { + 'found': 1, + 'top': [stats_for_candidate(call[0]) + for call in fake_process_broker_calls[2:3]] + } + }) + check_recon(recon_data[1], fake_periods[3], + sum(fake_periods[:4]), fake_stats) + # stats report at end of second cycle - both candidates reported + fake_stats.update({'visited': {'attempted': 2, 'skipped': 0, + 'success': 2, 'failure': 0, + 'completed': 0}}) + fake_stats.update({ + 'sharding_candidates': { + 'found': 2, + 'top': [stats_for_candidate(call[0]) + for call in fake_process_broker_calls[2:4]] + } + }) + check_recon(recon_data[2], sum(fake_periods[3:5]), + sum(fake_periods[:5]), fake_stats) + # stats report at end of third cycle + fake_stats.update({'visited': {'attempted': 2, 'skipped': 0, + 'success': 2, 'failure': 0, + 'completed': 0}}) + fake_stats.update({ + 'sharding_candidates': { + 'found': 2, + 'top': [stats_for_candidate(call[0]) + for call in fake_process_broker_calls[4:6]] + } + }) + check_recon(recon_data[3], sum(fake_periods[5:7]), + sum(fake_periods[:7]), fake_stats) + + def test_one_shard_cycle(self): + conf = {'recon_cache_path': self.tempdir, + 'devices': self.tempdir, + 'shard_container_threshold': 9} + with self._mock_sharder(conf) as sharder: + sharder._check_node = lambda *args: True + sharder.reported = time.time() + sharder.logger = debug_logger() + brokers = [] + device_ids = set(range(3)) + for device_id in device_ids: + brokers.append(self._make_broker( + container='c%s' % device_id, hash_='c%shash' % device_id, + device=sharder.ring.devs[device_id]['device'], part=0)) + # enable a/c2 and a/c3 for sharding + for broker in brokers[1:]: + broker.update_metadata({'X-Container-Sysmeta-Sharding': + ('true', next(self.ts_iter).internal)}) + # make a/c2 a candidate for sharding + for i in range(10): + brokers[1].put_object('o%s' % i, next(self.ts_iter).internal, + 0, 'text/plain', 'etag', 0) + + # check only sharding enabled containers are processed + with mock.patch.object( + sharder, '_process_broker' + ) as mock_process_broker: + sharder._local_device_ids = {'stale_node_id'} + sharder._one_shard_cycle(Everything(), Everything()) + + self.assertEqual(device_ids, sharder._local_device_ids) + self.assertEqual(2, mock_process_broker.call_count) + processed_paths = [call[0][0].path + for call in mock_process_broker.call_args_list] + self.assertEqual({'a/c1', 'a/c2'}, set(processed_paths)) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + expected_stats = {'attempted': 2, 'success': 2, 'failure': 0, + 'skipped': 1, 'completed': 0} + self._assert_recon_stats(expected_stats, sharder, 'visited') + expected_candidate_stats = { + 'found': 1, + 'top': [{'object_count': 10, 'account': 'a', 'container': 'c1', + 'meta_timestamp': mock.ANY, + 'file_size': os.stat(brokers[1].db_file).st_size, + 'path': brokers[1].db_file, 'root': 'a/c1', + 'node_index': 1}]} + self._assert_recon_stats( + expected_candidate_stats, sharder, 'sharding_candidates') + self._assert_recon_stats(None, sharder, 'sharding_progress') + + # enable and progress container a/c1 by giving it shard ranges + now = next(self.ts_iter) + brokers[0].merge_shard_ranges( + [ShardRange('a/c0', now, '', '', state=ShardRange.SHARDING), + ShardRange('.s_a/1', now, '', 'b', state=ShardRange.ACTIVE), + ShardRange('.s_a/2', now, 'b', 'c', state=ShardRange.CLEAVED), + ShardRange('.s_a/3', now, 'c', 'd', state=ShardRange.CREATED), + ShardRange('.s_a/4', now, 'd', 'e', state=ShardRange.CREATED), + ShardRange('.s_a/5', now, 'e', '', state=ShardRange.FOUND)]) + brokers[1].merge_shard_ranges( + [ShardRange('a/c1', now, '', '', state=ShardRange.SHARDING), + ShardRange('.s_a/6', now, '', 'b', state=ShardRange.ACTIVE), + ShardRange('.s_a/7', now, 'b', 'c', state=ShardRange.ACTIVE), + ShardRange('.s_a/8', now, 'c', 'd', state=ShardRange.CLEAVED), + ShardRange('.s_a/9', now, 'd', 'e', state=ShardRange.CREATED), + ShardRange('.s_a/0', now, 'e', '', state=ShardRange.CREATED)]) + for i in range(11): + brokers[2].put_object('o%s' % i, next(self.ts_iter).internal, + 0, 'text/plain', 'etag', 0) + + def mock_processing(broker, node, part): + if broker.path == 'a/c1': + raise Exception('kapow!') + elif broker.path not in ('a/c0', 'a/c2'): + raise BaseException("I don't know how to handle a broker " + "for %s" % broker.path) + + # check exceptions are handled + with mock.patch.object( + sharder, '_process_broker', side_effect=mock_processing + ) as mock_process_broker: + sharder._local_device_ids = {'stale_node_id'} + sharder._one_shard_cycle(Everything(), Everything()) + + self.assertEqual(device_ids, sharder._local_device_ids) + self.assertEqual(3, mock_process_broker.call_count) + processed_paths = [call[0][0].path + for call in mock_process_broker.call_args_list] + self.assertEqual({'a/c0', 'a/c1', 'a/c2'}, set(processed_paths)) + lines = sharder.logger.get_lines_for_level('error') + self.assertIn('Unhandled exception while processing', lines[0]) + self.assertFalse(lines[1:]) + sharder.logger.clear() + expected_stats = {'attempted': 3, 'success': 2, 'failure': 1, + 'skipped': 0, 'completed': 0} + self._assert_recon_stats(expected_stats, sharder, 'visited') + expected_candidate_stats = { + 'found': 1, + 'top': [{'object_count': 11, 'account': 'a', 'container': 'c2', + 'meta_timestamp': mock.ANY, + 'file_size': os.stat(brokers[1].db_file).st_size, + 'path': brokers[2].db_file, 'root': 'a/c2', + 'node_index': 2}]} + self._assert_recon_stats( + expected_candidate_stats, sharder, 'sharding_candidates') + expected_in_progress_stats = { + 'all': [{'object_count': 0, 'account': 'a', 'container': 'c0', + 'meta_timestamp': mock.ANY, + 'file_size': os.stat(brokers[0].db_file).st_size, + 'path': brokers[0].db_file, 'root': 'a/c0', + 'node_index': 0, + 'found': 1, 'created': 2, 'cleaved': 1, 'active': 1, + 'state': 'sharding', 'db_state': 'unsharded', + 'error': None}, + {'object_count': 10, 'account': 'a', 'container': 'c1', + 'meta_timestamp': mock.ANY, + 'file_size': os.stat(brokers[1].db_file).st_size, + 'path': brokers[1].db_file, 'root': 'a/c1', + 'node_index': 1, + 'found': 0, 'created': 2, 'cleaved': 1, 'active': 2, + 'state': 'sharding', 'db_state': 'unsharded', + 'error': 'kapow!'}]} + self._assert_stats( + expected_in_progress_stats, sharder, 'sharding_in_progress') + + # check that candidates and in progress stats don't stick in recon + own_shard_range = brokers[0].get_own_shard_range() + own_shard_range.state = ShardRange.ACTIVE + brokers[0].merge_shard_ranges([own_shard_range]) + for i in range(10): + brokers[1].delete_object( + 'o%s' % i, next(self.ts_iter).internal) + with mock.patch.object( + sharder, '_process_broker' + ) as mock_process_broker: + sharder._local_device_ids = {999} + sharder._one_shard_cycle(Everything(), Everything()) + + self.assertEqual(device_ids, sharder._local_device_ids) + self.assertEqual(3, mock_process_broker.call_count) + processed_paths = [call[0][0].path + for call in mock_process_broker.call_args_list] + self.assertEqual({'a/c0', 'a/c1', 'a/c2'}, set(processed_paths)) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + expected_stats = {'attempted': 3, 'success': 3, 'failure': 0, + 'skipped': 0, 'completed': 0} + self._assert_recon_stats(expected_stats, sharder, 'visited') + self._assert_recon_stats( + expected_candidate_stats, sharder, 'sharding_candidates') + self._assert_recon_stats(None, sharder, 'sharding_progress') + + @contextmanager + def _mock_sharder(self, conf=None, replicas=3): + conf = conf or {} + conf['devices'] = self.tempdir + with mock.patch( + 'swift.container.sharder.internal_client.InternalClient'): + with mock.patch( + 'swift.common.db_replicator.ring.Ring', + lambda *args, **kwargs: FakeRing(replicas=replicas)): + sharder = ContainerSharder(conf, logger=FakeLogger()) + sharder._local_device_ids = {0, 1, 2} + sharder._replicate_object = mock.MagicMock( + return_value=(True, [True] * sharder.ring.replica_count)) + yield sharder + + def _get_raw_object_records(self, broker): + # use list_objects_iter with no-op transform_func to get back actual + # un-transformed rows with encoded timestamps + return [list(obj) for obj in broker.list_objects_iter( + 10, '', '', '', '', include_deleted=None, all_policies=True, + transform_func=lambda record: record)] + + def _check_objects(self, expected_objs, shard_db): + shard_broker = ContainerBroker(shard_db) + shard_objs = self._get_raw_object_records(shard_broker) + expected_objs = [list(obj) for obj in expected_objs] + self.assertEqual(expected_objs, shard_objs) + + def _check_shard_range(self, expected, actual): + expected_dict = dict(expected) + actual_dict = dict(actual) + self.assertGreater(actual_dict.pop('meta_timestamp'), + expected_dict.pop('meta_timestamp')) + self.assertEqual(expected_dict, actual_dict) + + def test_fetch_shard_ranges_unexpected_response(self): + broker = self._make_broker() + exc = internal_client.UnexpectedResponse( + 'Unexpected response: 404', None) + with self._mock_sharder() as sharder: + sharder.int_client.make_request.side_effect = exc + self.assertIsNone(sharder._fetch_shard_ranges(broker)) + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Unexpected response: 404', lines[0]) + self.assertFalse(lines[1:]) + + def test_fetch_shard_ranges_bad_record_type(self): + def do_test(mock_resp_headers): + with self._mock_sharder() as sharder: + mock_make_request = mock.MagicMock( + return_value=mock.MagicMock(headers=mock_resp_headers)) + sharder.int_client.make_request = mock_make_request + self.assertIsNone(sharder._fetch_shard_ranges(broker)) + lines = sharder.logger.get_lines_for_level('error') + self.assertIn('unexpected record type', lines[0]) + self.assertFalse(lines[1:]) + + broker = self._make_broker() + do_test({}) + do_test({'x-backend-record-type': 'object'}) + do_test({'x-backend-record-type': 'disco'}) + + def test_fetch_shard_ranges_bad_data(self): + def do_test(mock_resp_body): + mock_resp_headers = {'x-backend-record-type': 'shard'} + with self._mock_sharder() as sharder: + mock_make_request = mock.MagicMock( + return_value=mock.MagicMock(headers=mock_resp_headers, + body=mock_resp_body)) + sharder.int_client.make_request = mock_make_request + self.assertIsNone(sharder._fetch_shard_ranges(broker)) + lines = sharder.logger.get_lines_for_level('error') + self.assertIn('invalid data', lines[0]) + self.assertFalse(lines[1:]) + + broker = self._make_broker() + do_test({}) + do_test('') + do_test(json.dumps({})) + do_test(json.dumps([{'account': 'a', 'container': 'c'}])) + + def test_fetch_shard_ranges_ok(self): + def do_test(mock_resp_body, params): + mock_resp_headers = {'x-backend-record-type': 'shard'} + with self._mock_sharder() as sharder: + mock_make_request = mock.MagicMock( + return_value=mock.MagicMock(headers=mock_resp_headers, + body=mock_resp_body)) + sharder.int_client.make_request = mock_make_request + mock_make_path = mock.MagicMock(return_value='/v1/a/c') + sharder.int_client.make_path = mock_make_path + actual = sharder._fetch_shard_ranges(broker, params=params) + sharder.int_client.make_path.assert_called_once_with('a', 'c') + self.assertFalse(sharder.logger.get_lines_for_level('error')) + return actual, mock_make_request + + expected_headers = {'X-Backend-Record-Type': 'shard', + 'X-Backend-Include-Deleted': 'False', + 'X-Backend-Override-Deleted': 'true'} + broker = self._make_broker() + shard_ranges = self._make_shard_ranges((('', 'm'), ('m', ''))) + + params = {'format': 'json'} + actual, mock_call = do_test(json.dumps([dict(shard_ranges[0])]), + params={}) + mock_call.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + self._assert_shard_ranges_equal([shard_ranges[0]], actual) + + params = {'format': 'json', 'includes': 'thing'} + actual, mock_call = do_test( + json.dumps([dict(sr) for sr in shard_ranges]), params=params) + self._assert_shard_ranges_equal(shard_ranges, actual) + mock_call.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + params = {'format': 'json', 'end_marker': 'there', 'marker': 'here'} + actual, mock_call = do_test(json.dumps([]), params=params) + self._assert_shard_ranges_equal([], actual) + mock_call.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + def _check_cleave_root(self, conf=None): + broker = self._make_broker() + objects = [ + # shard 0 + ('a', self.ts_encoded(), 10, 'text/plain', 'etag_a', 0, 0), + ('here', self.ts_encoded(), 10, 'text/plain', 'etag_here', 0, 0), + # shard 1 + ('m', self.ts_encoded(), 1, 'text/plain', 'etag_m', 0, 0), + ('n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0), + ('there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0), + # shard 2 + ('where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0), + # shard 3 + ('x', self.ts_encoded(), 0, '', '', 1, 0), # deleted + ('y', self.ts_encoded(), 1000, 'text/plain', 'etag_y', 0, 0), + # shard 4 + ('yyyy', self.ts_encoded(), 14, 'text/plain', 'etag_yyyy', 0, 0), + ] + for obj in objects: + broker.put_object(*obj) + initial_root_info = broker.get_info() + broker.enable_sharding(Timestamp.now()) + + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', 'yonder'), + ('yonder', '')) + shard_ranges = self._make_shard_ranges(shard_bounds) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + # used to accumulate stats from sharded dbs + total_shard_stats = {'object_count': 0, 'bytes_used': 0} + # run cleave - no shard ranges, nothing happens + with self._mock_sharder(conf=conf) as sharder: + self.assertFalse(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(0, context.ranges_done) + self.assertEqual(0, context.ranges_todo) + + self.assertEqual(UNSHARDED, broker.get_db_state()) + sharder._replicate_object.assert_not_called() + for db in expected_shard_dbs: + with annotate_failure(db): + self.assertFalse(os.path.exists(db)) + + # run cleave - all shard ranges in found state, nothing happens + broker.merge_shard_ranges(shard_ranges[:4]) + self.assertTrue(broker.set_sharding_state()) + + with self._mock_sharder(conf=conf) as sharder: + self.assertFalse(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(0, context.ranges_done) + self.assertEqual(4, context.ranges_todo) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_not_called() + for db in expected_shard_dbs: + with annotate_failure(db): + self.assertFalse(os.path.exists(db)) + for shard_range in broker.get_shard_ranges(): + with annotate_failure(shard_range): + self.assertEqual(ShardRange.FOUND, shard_range.state) + + # move first shard range to created state, first shard range is cleaved + shard_ranges[0].update_state(ShardRange.CREATED) + broker.merge_shard_ranges(shard_ranges[:1]) + with self._mock_sharder(conf=conf) as sharder: + self.assertFalse(sharder._cleave(broker)) + + expected = {'attempted': 1, 'success': 1, 'failure': 0, + 'min_time': mock.ANY, 'max_time': mock.ANY} + stats = self._assert_stats(expected, sharder, 'cleaved') + self.assertIsInstance(stats['min_time'], float) + self.assertIsInstance(stats['max_time'], float) + self.assertLessEqual(stats['min_time'], stats['max_time']) + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[0], 0) + shard_broker = ContainerBroker(expected_shard_dbs[0]) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + shard_info = shard_broker.get_info() + total_shard_stats['object_count'] += shard_info['object_count'] + total_shard_stats['bytes_used'] += shard_info['bytes_used'] + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(4, len(updated_shard_ranges)) + # update expected state and metadata, check cleaved shard range + shard_ranges[0].bytes_used = 20 + shard_ranges[0].object_count = 2 + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + self._check_objects(objects[:2], expected_shard_dbs[0]) + # other shard ranges should be unchanged + for i in range(1, len(shard_ranges)): + with annotate_failure(i): + self.assertFalse(os.path.exists(expected_shard_dbs[i])) + for i in range(1, len(updated_shard_ranges)): + with annotate_failure(i): + self.assertEqual(dict(shard_ranges[i]), + dict(updated_shard_ranges[i])) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('here', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(1, context.ranges_done) + self.assertEqual(3, context.ranges_todo) + + unlink_files(expected_shard_dbs) + + # move more shard ranges to created state + for i in range(1, 4): + shard_ranges[i].update_state(ShardRange.CREATED) + broker.merge_shard_ranges(shard_ranges[1:4]) + + # replication of next shard range is not sufficiently successful + with self._mock_sharder(conf=conf) as sharder: + quorum = quorum_size(sharder.ring.replica_count) + successes = [True] * (quorum - 1) + fails = [False] * (sharder.ring.replica_count - len(successes)) + responses = successes + fails + random.shuffle(responses) + sharder._replicate_object = mock.MagicMock( + side_effect=((False, responses),)) + self.assertFalse(sharder._cleave(broker)) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[1], 0) + + # cleaving state is unchanged + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(4, len(updated_shard_ranges)) + for i in range(1, len(updated_shard_ranges)): + with annotate_failure(i): + self.assertEqual(dict(shard_ranges[i]), + dict(updated_shard_ranges[i])) + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('here', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(1, context.ranges_done) + self.assertEqual(3, context.ranges_todo) + + # try again, this time replication is sufficiently successful + with self._mock_sharder(conf=conf) as sharder: + successes = [True] * quorum + fails = [False] * (sharder.ring.replica_count - len(successes)) + responses1 = successes + fails + responses2 = fails + successes + sharder._replicate_object = mock.MagicMock( + side_effect=((False, responses1), (False, responses2))) + self.assertFalse(sharder._cleave(broker)) + + expected = {'attempted': 2, 'success': 2, 'failure': 0, + 'min_time': mock.ANY, 'max_time': mock.ANY} + stats = self._assert_stats(expected, sharder, 'cleaved') + self.assertIsInstance(stats['min_time'], float) + self.assertIsInstance(stats['max_time'], float) + self.assertLessEqual(stats['min_time'], stats['max_time']) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in expected_shard_dbs[1:3]] + ) + for db in expected_shard_dbs[1:3]: + shard_broker = ContainerBroker(db) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + shard_info = shard_broker.get_info() + total_shard_stats['object_count'] += shard_info['object_count'] + total_shard_stats['bytes_used'] += shard_info['bytes_used'] + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(4, len(updated_shard_ranges)) + + # only 2 are cleaved per batch + # update expected state and metadata, check cleaved shard ranges + shard_ranges[1].bytes_used = 6 + shard_ranges[1].object_count = 3 + shard_ranges[1].state = ShardRange.CLEAVED + shard_ranges[2].bytes_used = 100 + shard_ranges[2].object_count = 1 + shard_ranges[2].state = ShardRange.CLEAVED + for i in range(0, 3): + with annotate_failure(i): + self._check_shard_range( + shard_ranges[i], updated_shard_ranges[i]) + self._check_objects(objects[2:5], expected_shard_dbs[1]) + self._check_objects(objects[5:6], expected_shard_dbs[2]) + # other shard ranges should be unchanged + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + for i, db in enumerate(expected_shard_dbs[3:], 3): + with annotate_failure(i): + self.assertFalse(os.path.exists(db)) + for i, updated_shard_range in enumerate(updated_shard_ranges[3:], 3): + with annotate_failure(i): + self.assertEqual(dict(shard_ranges[i]), + dict(updated_shard_range)) + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('where', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(3, context.ranges_done) + self.assertEqual(1, context.ranges_todo) + + unlink_files(expected_shard_dbs) + + # run cleave again - should process the fourth range + with self._mock_sharder(conf=conf) as sharder: + sharder.logger = debug_logger() + self.assertFalse(sharder._cleave(broker)) + + expected = {'attempted': 1, 'success': 1, 'failure': 0, + 'min_time': mock.ANY, 'max_time': mock.ANY} + stats = self._assert_stats(expected, sharder, 'cleaved') + self.assertIsInstance(stats['min_time'], float) + self.assertIsInstance(stats['max_time'], float) + self.assertLessEqual(stats['min_time'], stats['max_time']) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[3], 0) + shard_broker = ContainerBroker(expected_shard_dbs[3]) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + shard_info = shard_broker.get_info() + total_shard_stats['object_count'] += shard_info['object_count'] + total_shard_stats['bytes_used'] += shard_info['bytes_used'] + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(4, len(updated_shard_ranges)) + + shard_ranges[3].bytes_used = 1000 + shard_ranges[3].object_count = 1 + shard_ranges[3].state = ShardRange.CLEAVED + for i in range(0, 4): + with annotate_failure(i): + self._check_shard_range( + shard_ranges[i], updated_shard_ranges[i]) + # NB includes the deleted object + self._check_objects(objects[6:8], expected_shard_dbs[3]) + # other shard ranges should be unchanged + for i, db in enumerate(expected_shard_dbs[:3]): + with annotate_failure(i): + self.assertFalse(os.path.exists(db)) + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + for i, updated_shard_range in enumerate(updated_shard_ranges[4:], 4): + with annotate_failure(i): + self.assertEqual(dict(shard_ranges[i]), + dict(updated_shard_range)) + + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('yonder', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(4, context.ranges_done) + self.assertEqual(0, context.ranges_todo) + + unlink_files(expected_shard_dbs) + + # run cleave - should be a no-op, all existing ranges have been cleaved + with self._mock_sharder(conf=conf) as sharder: + self.assertFalse(sharder._cleave(broker)) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_not_called() + + # add final shard range - move this to ACTIVE state and update stats to + # simulate another replica having cleaved it and replicated its state + shard_ranges[4].update_state(ShardRange.ACTIVE) + shard_ranges[4].update_meta(2, 15) + broker.merge_shard_ranges(shard_ranges[4:]) + + with self._mock_sharder(conf=conf) as sharder: + self.assertTrue(sharder._cleave(broker)) + + expected = {'attempted': 1, 'success': 1, 'failure': 0, + 'min_time': mock.ANY, 'max_time': mock.ANY} + stats = self._assert_stats(expected, sharder, 'cleaved') + self.assertIsInstance(stats['min_time'], float) + self.assertIsInstance(stats['max_time'], float) + self.assertLessEqual(stats['min_time'], stats['max_time']) + + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[4], 0) + shard_broker = ContainerBroker(expected_shard_dbs[4]) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.ACTIVE, shard_own_sr.state) + shard_info = shard_broker.get_info() + total_shard_stats['object_count'] += shard_info['object_count'] + total_shard_stats['bytes_used'] += shard_info['bytes_used'] + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(5, len(updated_shard_ranges)) + # NB stats of the ACTIVE shard range should not be reset by cleaving + for i in range(0, 4): + with annotate_failure(i): + self._check_shard_range( + shard_ranges[i], updated_shard_ranges[i]) + self.assertEqual(dict(shard_ranges[4]), dict(updated_shard_ranges[4])) + + # object copied to shard + self._check_objects(objects[8:], expected_shard_dbs[4]) + # other shard ranges should be unchanged + for i, db in enumerate(expected_shard_dbs[:4]): + with annotate_failure(i): + self.assertFalse(os.path.exists(db)) + + self.assertEqual(initial_root_info['object_count'], + total_shard_stats['object_count']) + self.assertEqual(initial_root_info['bytes_used'], + total_shard_stats['bytes_used']) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertTrue(context.cleaving_done) + self.assertEqual('', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(5, context.ranges_done) + self.assertEqual(0, context.ranges_todo) + + with self._mock_sharder(conf=conf) as sharder: + self.assertTrue(sharder._cleave(broker)) + sharder._replicate_object.assert_not_called() + + self.assertTrue(broker.set_sharded_state()) + # run cleave - should be a no-op + with self._mock_sharder(conf=conf) as sharder: + self.assertTrue(sharder._cleave(broker)) + + sharder._replicate_object.assert_not_called() + + def test_cleave_root(self): + self._check_cleave_root() + + def test_cleave_root_listing_limit_one(self): + # force yield_objects to update its marker and call to the broker's + # get_objects() for each shard range, to check the marker moves on + self._check_cleave_root(conf={'cleave_row_batch_size': 1}) + + def test_cleave_root_ranges_change(self): + # verify that objects are not missed if shard ranges change between + # cleaving batches + broker = self._make_broker() + objects = [ + ('a', self.ts_encoded(), 10, 'text/plain', 'etag_a', 0, 0), + ('b', self.ts_encoded(), 10, 'text/plain', 'etag_b', 0, 0), + ('c', self.ts_encoded(), 1, 'text/plain', 'etag_c', 0, 0), + ('d', self.ts_encoded(), 2, 'text/plain', 'etag_d', 0, 0), + ('e', self.ts_encoded(), 3, 'text/plain', 'etag_e', 0, 0), + ('f', self.ts_encoded(), 100, 'text/plain', 'etag_f', 0, 0), + ('x', self.ts_encoded(), 0, '', '', 1, 0), # deleted + ('z', self.ts_encoded(), 1000, 'text/plain', 'etag_z', 0, 0) + ] + for obj in objects: + broker.put_object(*obj) + broker.enable_sharding(Timestamp.now()) + + shard_bounds = (('', 'd'), ('d', 'x'), ('x', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + broker.merge_shard_ranges(shard_ranges[:3]) + self.assertTrue(broker.set_sharding_state()) + + # run cleave - first batch is cleaved + with self._mock_sharder() as sharder: + self.assertFalse(sharder._cleave(broker)) + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual(str(shard_ranges[1].upper), context.cursor) + self.assertEqual(8, context.cleave_to_row) + self.assertEqual(8, context.max_row) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in expected_shard_dbs[:2]] + ) + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(3, len(updated_shard_ranges)) + + # first 2 shard ranges should have updated object count, bytes used and + # meta_timestamp + shard_ranges[0].bytes_used = 23 + shard_ranges[0].object_count = 4 + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + shard_ranges[1].bytes_used = 103 + shard_ranges[1].object_count = 2 + shard_ranges[1].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[1], updated_shard_ranges[1]) + self._check_objects(objects[:4], expected_shard_dbs[0]) + self._check_objects(objects[4:7], expected_shard_dbs[1]) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + # third shard range should be unchanged - not yet cleaved + self.assertEqual(dict(shard_ranges[2]), + dict(updated_shard_ranges[2])) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual(str(shard_ranges[1].upper), context.cursor) + self.assertEqual(8, context.cleave_to_row) + self.assertEqual(8, context.max_row) + + # now change the shard ranges so that third consumes second + shard_ranges[1].set_deleted() + shard_ranges[2].lower = 'd' + shard_ranges[2].timestamp = Timestamp.now() + + broker.merge_shard_ranges(shard_ranges[1:3]) + + # run cleave - should process the extended third (final) range + with self._mock_sharder() as sharder: + self.assertTrue(sharder._cleave(broker)) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[2], 0) + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(2, len(updated_shard_ranges)) + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + # third shard range should now have updated object count, bytes used, + # including objects previously in the second shard range + shard_ranges[2].bytes_used = 1103 + shard_ranges[2].object_count = 3 + shard_ranges[2].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[2], updated_shard_ranges[1]) + self._check_objects(objects[4:8], expected_shard_dbs[2]) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertTrue(context.cleaving_done) + self.assertEqual(str(shard_ranges[2].upper), context.cursor) + self.assertEqual(8, context.cleave_to_row) + self.assertEqual(8, context.max_row) + + def test_cleave_shard(self): + broker = self._make_broker(account='.shards_a', container='shard_c') + own_shard_range = ShardRange( + broker.path, Timestamp.now(), 'here', 'where', + state=ShardRange.SHARDING, epoch=Timestamp.now()) + broker.merge_shard_ranges([own_shard_range]) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertFalse(broker.is_root_container()) # sanity check + + objects = [ + ('m', self.ts_encoded(), 1, 'text/plain', 'etag_m', 0, 0), + ('n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0), + ('there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0), + ('where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0), + ] + misplaced_objects = [ + ('a', self.ts_encoded(), 1, 'text/plain', 'etag_a', 0, 0), + ('z', self.ts_encoded(), 100, 'text/plain', 'etag_z', 1, 0), + ] + for obj in objects + misplaced_objects: + broker.put_object(*obj) + + shard_bounds = (('here', 'there'), + ('there', 'where')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + misplaced_bounds = (('', 'here'), + ('where', '')) + misplaced_ranges = self._make_shard_ranges( + misplaced_bounds, state=ShardRange.ACTIVE) + misplaced_dbs = [] + for shard_range in misplaced_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + misplaced_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + + # run cleave - first range is cleaved but move of misplaced objects is + # not successful + sharder_conf = {'cleave_batch_size': 1} + with self._mock_sharder(sharder_conf) as sharder: + with mock.patch.object( + sharder, '_make_shard_range_fetcher', + return_value=lambda: iter(misplaced_ranges)): + # cause misplaced objects replication to not succeed + quorum = quorum_size(sharder.ring.replica_count) + successes = [True] * (quorum - 1) + fails = [False] * (sharder.ring.replica_count - len(successes)) + responses = successes + fails + random.shuffle(responses) + bad_result = (False, responses) + ok_result = (True, [True] * sharder.ring.replica_count) + sharder._replicate_object = mock.MagicMock( + # result for misplaced, misplaced, cleave + side_effect=(bad_result, ok_result, ok_result)) + self.assertFalse(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertFalse(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual(str(shard_ranges[0].upper), context.cursor) + self.assertEqual(6, context.cleave_to_row) + self.assertEqual(6, context.max_row) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, misplaced_dbs[0], 0), + mock.call(0, misplaced_dbs[1], 0), + mock.call(0, expected_shard_dbs[0], 0)]) + shard_broker = ContainerBroker(expected_shard_dbs[0]) + # NB cleaving a shard, state goes to CLEAVED not ACTIVE + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(2, len(updated_shard_ranges)) + + # first shard range should have updated object count, bytes used and + # meta_timestamp + shard_ranges[0].bytes_used = 6 + shard_ranges[0].object_count = 3 + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + self._check_objects(objects[:3], expected_shard_dbs[0]) + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self._check_objects(misplaced_objects[:1], misplaced_dbs[0]) + self._check_objects(misplaced_objects[1:], misplaced_dbs[1]) + unlink_files(expected_shard_dbs) + unlink_files(misplaced_dbs) + + # run cleave - second (final) range is cleaved; move this range to + # CLEAVED state and update stats to simulate another replica having + # cleaved it and replicated its state + shard_ranges[1].update_state(ShardRange.CLEAVED) + shard_ranges[1].update_meta(2, 15) + broker.merge_shard_ranges(shard_ranges[1:2]) + with self._mock_sharder(sharder_conf) as sharder: + with mock.patch.object( + sharder, '_make_shard_range_fetcher', + return_value=lambda: iter(misplaced_ranges)): + self.assertTrue(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertTrue(context.cleaving_done) + self.assertEqual(str(shard_ranges[1].upper), context.cursor) + self.assertEqual(6, context.cleave_to_row) + self.assertEqual(6, context.max_row) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, misplaced_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0)]) + shard_broker = ContainerBroker(expected_shard_dbs[1]) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(2, len(updated_shard_ranges)) + + # second shard range should have updated object count, bytes used and + # meta_timestamp + self.assertEqual(dict(shard_ranges[1]), dict(updated_shard_ranges[1])) + self._check_objects(objects[3:], expected_shard_dbs[1]) + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + self._check_objects(misplaced_objects[:1], misplaced_dbs[0]) + self.assertFalse(os.path.exists(misplaced_dbs[1])) + + def test_cleave_shard_shrinking(self): + broker = self._make_broker(account='.shards_a', container='shard_c') + own_shard_range = ShardRange( + broker.path, next(self.ts_iter), 'here', 'where', + state=ShardRange.SHRINKING, epoch=next(self.ts_iter)) + broker.merge_shard_ranges([own_shard_range]) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertFalse(broker.is_root_container()) # sanity check + + objects = [ + ('there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0), + ('where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0), + ] + for obj in objects: + broker.put_object(*obj) + acceptor_epoch = next(self.ts_iter) + acceptor = ShardRange('.shards_a/acceptor', Timestamp.now(), + 'here', 'yonder', '1000', '11111', + state=ShardRange.ACTIVE, epoch=acceptor_epoch) + db_hash = hash_path(acceptor.account, acceptor.container) + # NB expected cleave db includes acceptor epoch + expected_shard_db = os.path.join( + self.tempdir, 'sda', 'containers', '0', db_hash[-3:], db_hash, + '%s_%s.db' % (db_hash, acceptor_epoch.internal)) + + broker.merge_shard_ranges([acceptor]) + broker.set_sharding_state() + + # run cleave + with self._mock_sharder() as sharder: + self.assertTrue(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertTrue(context.cleaving_done) + self.assertEqual(str(acceptor.upper), context.cursor) + self.assertEqual(2, context.cleave_to_row) + self.assertEqual(2, context.max_row) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_db, 0)]) + shard_broker = ContainerBroker(expected_shard_db) + # NB when cleaving a shard container to a larger acceptor namespace + # then expect the shard broker's own shard range to reflect that of the + # acceptor shard range rather than being set to CLEAVED. + self.assertEqual( + ShardRange.ACTIVE, shard_broker.get_own_shard_range().state) + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(1, len(updated_shard_ranges)) + self.assertEqual(dict(acceptor), dict(updated_shard_ranges[0])) + + # shard range should have unmodified acceptor, bytes used and + # meta_timestamp + self._check_objects(objects, expected_shard_db) + + def test_cleave_repeated(self): + # verify that if new objects are merged into retiring db after cleaving + # started then cleaving will repeat but only new objects are cleaved + # in the repeated cleaving pass + broker = self._make_broker() + objects = [ + ('obj%03d' % i, next(self.ts_iter), 1, 'text/plain', 'etag', 0, 0) + for i in range(10) + ] + new_objects = [ + (name, next(self.ts_iter), 1, 'text/plain', 'etag', 0, 0) + for name in ('alpha', 'zeta') + ] + for obj in objects: + broker.put_object(*obj) + broker._commit_puts() + broker.enable_sharding(Timestamp.now()) + shard_bounds = (('', 'obj004'), ('obj004', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + old_broker = broker.get_brokers()[0] + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + + calls = [] + key = ('name', 'created_at', 'size', 'content_type', 'etag', 'deleted') + + def mock_replicate_object(part, db, node_id): + # merge new objects between cleave of first and second shard ranges + if not calls: + old_broker.merge_items( + [dict(zip(key, obj)) for obj in new_objects]) + calls.append((part, db, node_id)) + return True, [True, True, True] + + with self._mock_sharder() as sharder: + sharder._audit_container = mock.MagicMock() + sharder._replicate_object = mock_replicate_object + sharder._process_broker(broker, node, 99) + + # sanity check - the new objects merged into the old db + self.assertFalse(broker.get_objects()) + self.assertEqual(12, len(old_broker.get_objects())) + + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + self.assertEqual([(0, expected_shard_dbs[0], 0), + (0, expected_shard_dbs[1], 0)], calls) + + # check shard ranges were updated to CLEAVED + updated_shard_ranges = broker.get_shard_ranges() + # 'alpha' was not in table when first shard was cleaved + shard_ranges[0].bytes_used = 5 + shard_ranges[0].object_count = 5 + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + self._check_objects(objects[:5], expected_shard_dbs[0]) + # 'zeta' was in table when second shard was cleaved + shard_ranges[1].bytes_used = 6 + shard_ranges[1].object_count = 6 + shard_ranges[1].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[1], updated_shard_ranges[1]) + self._check_objects(objects[5:] + new_objects[1:], + expected_shard_dbs[1]) + + context = CleavingContext.load(broker) + self.assertFalse(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('', context.cursor) + self.assertEqual(10, context.cleave_to_row) + self.assertEqual(12, context.max_row) # note that max row increased + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Repeat cleaving required', lines[0]) + self.assertFalse(lines[1:]) + unlink_files(expected_shard_dbs) + + # repeat the cleaving - the newer objects get cleaved + with self._mock_sharder() as sharder: + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + + # this time the sharding completed + self.assertEqual(SHARDED, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDED, + broker.get_own_shard_range().state) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0)]) + + # shard ranges are now ACTIVE - stats not updated by cleaving + updated_shard_ranges = broker.get_shard_ranges() + shard_ranges[0].state = ShardRange.ACTIVE + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + self._check_objects(new_objects[:1], expected_shard_dbs[0]) + # both new objects are included in repeat cleaving but no older objects + shard_ranges[1].state = ShardRange.ACTIVE + self._check_shard_range(shard_ranges[1], updated_shard_ranges[1]) + self._check_objects(new_objects[1:], expected_shard_dbs[1]) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + def test_cleave_multiple_storage_policies(self): + # verify that objects in all storage policies are cleaved + broker = self._make_broker() + # add objects in multiple policies + objects = [{'name': 'obj_%03d' % i, + 'created_at': Timestamp.now().normal, + 'content_type': 'text/plain', + 'etag': 'etag_%d' % i, + 'size': 1024 * i, + 'deleted': i % 2, + 'storage_policy_index': i % 2, + } for i in range(1, 8)] + # merge_items mutates items + broker.merge_items([dict(obj) for obj in objects]) + broker.enable_sharding(Timestamp.now()) + shard_ranges = self._make_shard_ranges( + (('', 'obj_004'), ('obj_004', '')), state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + + with self._mock_sharder() as sharder: + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + + # check shard ranges were updated to ACTIVE + self.assertEqual([ShardRange.ACTIVE] * 2, + [sr.state for sr in broker.get_shard_ranges()]) + shard_broker = ContainerBroker(expected_shard_dbs[0]) + actual_objects = shard_broker.get_objects() + self.assertEqual(objects[:4], actual_objects) + + shard_broker = ContainerBroker(expected_shard_dbs[1]) + actual_objects = shard_broker.get_objects() + self.assertEqual(objects[4:], actual_objects) + + def test_cleave_insufficient_replication(self): + # verify that if replication of a cleaved shard range fails then rows + # are not merged again to the existing shard db + broker = self._make_broker() + retiring_db_id = broker.get_info()['id'] + objects = [ + {'name': 'obj%03d' % i, 'created_at': next(self.ts_iter), + 'size': 1, 'content_type': 'text/plain', 'etag': 'etag', + 'deleted': 0, 'storage_policy_index': 0} + for i in range(10) + ] + broker.merge_items([dict(obj) for obj in objects]) + broker._commit_puts() + broker.enable_sharding(Timestamp.now()) + shard_bounds = (('', 'obj004'), ('obj004', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + new_object = {'name': 'alpha', 'created_at': next(self.ts_iter), + 'size': 0, 'content_type': 'text/plain', 'etag': 'etag', + 'deleted': 0, 'storage_policy_index': 0} + broker.merge_items([dict(new_object)]) + + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + orig_merge_items = ContainerBroker.merge_items + + def mock_merge_items(broker, items): + merge_items_calls.append((broker.path, + # merge mutates item so make a copy + [dict(item) for item in items])) + orig_merge_items(broker, items) + + # first shard range cleaved but fails to replicate + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + with self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + return_value=(False, [False, False, True])) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + # first shard range cleaved to shard broker + self.assertEqual([(shard_ranges[0].name, objects[:5])], + merge_items_calls) + # replication of first shard range fails - no more shards attempted + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[0], 0) + # shard broker has sync points + shard_broker = ContainerBroker(expected_shard_dbs[0]) + self.assertEqual( + [{'remote_id': retiring_db_id, 'sync_point': len(objects)}], + shard_broker.get_syncs()) + self.assertEqual(objects[:5], shard_broker.get_objects()) + + # first shard range replicates ok, no new merges required, second is + # cleaved but fails to replicate + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items), self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, True, True]), + (False, [False, False, True])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + + broker_shard_ranges = broker.get_shard_ranges() + shard_ranges[0].object_count = 5 + shard_ranges[0].bytes_used = sum(obj['size'] for obj in objects[:5]) + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], broker_shard_ranges[0]) + # second shard range still in created state + self._assert_shard_ranges_equal([shard_ranges[1]], + [broker_shard_ranges[1]]) + # only second shard range rows were merged to shard db + self.assertEqual([(shard_ranges[1].name, objects[5:])], + merge_items_calls) + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0)]) + # shard broker has sync points + shard_broker = ContainerBroker(expected_shard_dbs[1]) + self.assertEqual( + [{'remote_id': retiring_db_id, 'sync_point': len(objects)}], + shard_broker.get_syncs()) + self.assertEqual(objects[5:], shard_broker.get_objects()) + + # repeat - second shard range cleaves fully because its previously + # cleaved shard db no longer exists + unlink_files(expected_shard_dbs) + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + with self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(True, [True, True, True]), # misplaced obj + (False, [False, True, True])]) + sharder._audit_container = mock.MagicMock() + sharder.logger = debug_logger() + sharder._process_broker(broker, node, 99) + + self.assertEqual(SHARDED, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDED, + broker.get_own_shard_range().state) + + broker_shard_ranges = broker.get_shard_ranges() + shard_ranges[1].object_count = 5 + shard_ranges[1].bytes_used = sum(obj['size'] for obj in objects[5:]) + shard_ranges[1].state = ShardRange.ACTIVE + self._check_shard_range(shard_ranges[1], broker_shard_ranges[1]) + # second shard range rows were merged to shard db again + self.assertEqual([(shard_ranges[0].name, [new_object]), + (shard_ranges[1].name, objects[5:])], + merge_items_calls) + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0)]) + # first shard broker was created by misplaced object - no sync point + shard_broker = ContainerBroker(expected_shard_dbs[0]) + self.assertFalse(shard_broker.get_syncs()) + self.assertEqual([new_object], shard_broker.get_objects()) + # second shard broker has sync points + shard_broker = ContainerBroker(expected_shard_dbs[1]) + self.assertEqual( + [{'remote_id': retiring_db_id, 'sync_point': len(objects)}], + shard_broker.get_syncs()) + self.assertEqual(objects[5:], shard_broker.get_objects()) + + def test_shard_replication_quorum_failures(self): + broker = self._make_broker() + objects = [ + {'name': 'obj%03d' % i, 'created_at': next(self.ts_iter), + 'size': 1, 'content_type': 'text/plain', 'etag': 'etag', + 'deleted': 0, 'storage_policy_index': 0} + for i in range(10) + ] + broker.merge_items([dict(obj) for obj in objects]) + broker._commit_puts() + shard_bounds = (('', 'obj002'), ('obj002', 'obj004'), + ('obj004', 'obj006'), ('obj006', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.enable_sharding(Timestamp.now()) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + with self._mock_sharder({'shard_replication_quorum': 3}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, True, True]), + (False, [False, False, True])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + # replication of first shard range fails - no more shards attempted + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[0], 0) + self.assertEqual([ShardRange.CREATED] * 4, + [sr.state for sr in broker.get_shard_ranges()]) + + # and again with a chilled out quorom, so cleaving moves onto second + # shard range which fails to reach even chilled quorum + with self._mock_sharder({'shard_replication_quorum': 1}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, False, True]), + (False, [False, False, False])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + self.assertEqual(sharder._replicate_object.call_args_list, [ + mock.call(0, expected_shard_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0), + ]) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CREATED, ShardRange.CREATED, + ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + + # now pretend another node successfully cleaved the second shard range, + # but this node still fails to replicate so still cannot move on + shard_ranges[1].update_state(ShardRange.CLEAVED) + broker.merge_shard_ranges(shard_ranges[1]) + with self._mock_sharder({'shard_replication_quorum': 1}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, False, False])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[1], 0) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED, + ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + + # until a super-chilled quorum is used - but even then there must have + # been an attempt to replicate + with self._mock_sharder( + {'shard_replication_quorum': 1, + 'existing_shard_replication_quorum': 0}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [])]) # maybe shard db was deleted + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[1], 0) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED, + ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + + # next pass - the second shard replication is attempted and fails, but + # that's ok because another node has cleaved it and + # existing_shard_replication_quorum is zero + with self._mock_sharder( + {'shard_replication_quorum': 1, + 'existing_shard_replication_quorum': 0}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, False, False]), + (False, [False, True, False])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + self.assertEqual(sharder._replicate_object.call_args_list, [ + mock.call(0, expected_shard_dbs[1], 0), + mock.call(0, expected_shard_dbs[2], 0), + ]) + self.assertEqual([ShardRange.CLEAVED] * 3 + [ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + self.assertEqual(1, sharder.shard_replication_quorum) + self.assertEqual(0, sharder.existing_shard_replication_quorum) + + # crazy replication quorums will be capped to replica_count + with self._mock_sharder( + {'shard_replication_quorum': 99, + 'existing_shard_replication_quorum': 99}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, True, True])]) + sharder._audit_container = mock.MagicMock() + sharder.logger = debug_logger() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[3], 0) + self.assertEqual([ShardRange.CLEAVED] * 3 + [ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + self.assertEqual(3, sharder.shard_replication_quorum) + self.assertEqual(3, sharder.existing_shard_replication_quorum) + + # ...and progress is still made if replication fully succeeds + with self._mock_sharder( + {'shard_replication_quorum': 99, + 'existing_shard_replication_quorum': 99}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(True, [True, True, True])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDED, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDED, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[3], 0) + self.assertEqual([ShardRange.ACTIVE] * 4, + [sr.state for sr in broker.get_shard_ranges()]) + warnings = sharder.logger.get_lines_for_level('warning') + self.assertIn( + 'shard_replication_quorum of 99 exceeds replica count', + warnings[0]) + self.assertIn( + 'existing_shard_replication_quorum of 99 exceeds replica count', + warnings[1]) + self.assertEqual(3, sharder.shard_replication_quorum) + self.assertEqual(3, sharder.existing_shard_replication_quorum) + + def test_cleave_to_existing_shard_db(self): + # verify that when cleaving to an already existing shard db + def replicate(node, from_broker, part): + # short circuit replication + rpc = replicator.ContainerReplicatorRpc( + self.tempdir, DATADIR, ContainerBroker, mount_check=False) + + fake_repl_connection = attach_fake_replication_rpc(rpc) + with mock.patch('swift.common.db_replicator.ReplConnection', + fake_repl_connection): + with mock.patch('swift.common.db_replicator.ring.Ring', + lambda *args, **kwargs: FakeRing()): + daemon = replicator.ContainerReplicator({}) + info = from_broker.get_replication_info() + success = daemon._repl_to_node( + node, from_broker, part, info) + self.assertTrue(success) + + orig_merge_items = ContainerBroker.merge_items + + def mock_merge_items(broker, items): + # capture merge_items calls + merge_items_calls.append((broker.path, + # merge mutates item so make a copy + [dict(item) for item in items])) + orig_merge_items(broker, items) + + objects = [ + {'name': 'obj%03d' % i, 'created_at': next(self.ts_iter), + 'size': 1, 'content_type': 'text/plain', 'etag': 'etag', + 'deleted': 0, 'storage_policy_index': 0} + for i in range(10) + ] + # local db gets 4 objects + local_broker = self._make_broker() + local_broker.merge_items([dict(obj) for obj in objects[2:6]]) + local_broker._commit_puts() + local_retiring_db_id = local_broker.get_info()['id'] + + # remote db gets 5 objects + remote_broker = self._make_broker(device='sdb') + remote_broker.merge_items([dict(obj) for obj in objects[2:7]]) + remote_broker._commit_puts() + remote_retiring_db_id = remote_broker.get_info()['id'] + + local_node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda', + 'id': '2', 'index': 0, 'replication_ip': '1.2.3.4', + 'replication_port': 6040} + remote_node = {'ip': '1.2.3.5', 'port': 6040, 'device': 'sdb', + 'id': '3', 'index': 1, 'replication_ip': '1.2.3.5', + 'replication_port': 6040} + + # remote db replicates to local, bringing local db's total to 5 objects + self.assertNotEqual(local_broker.get_objects(), + remote_broker.get_objects()) + replicate(local_node, remote_broker, 0) + self.assertEqual(local_broker.get_objects(), + remote_broker.get_objects()) + + # local db gets 2 new objects, bringing its total to 7 + local_broker.merge_items([dict(obj) for obj in objects[1:2]]) + local_broker.merge_items([dict(obj) for obj in objects[7:8]]) + + # local db gets shard ranges + own_shard_range = local_broker.get_own_shard_range() + now = Timestamp.now() + own_shard_range.update_state(ShardRange.SHARDING, state_timestamp=now) + own_shard_range.epoch = now + shard_ranges = self._make_shard_ranges( + (('', 'obj004'), ('obj004', '')), state=ShardRange.CREATED) + local_broker.merge_shard_ranges([own_shard_range] + shard_ranges) + self.assertTrue(local_broker.set_sharding_state()) + + # local db shards + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + with self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + return_value=(True, [True, True, True])) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(local_broker, local_node, 0) + + # all objects merged from local to shard ranges + self.assertEqual([(shard_ranges[0].name, objects[1:5]), + (shard_ranges[1].name, objects[5:8])], + merge_items_calls) + + # shard brokers have sync points + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + shard_broker = ContainerBroker(expected_shard_dbs[0]) + self.assertEqual( + [{'remote_id': local_retiring_db_id, 'sync_point': 7}, + {'remote_id': remote_retiring_db_id, 'sync_point': 5}], + shard_broker.get_syncs()) + self.assertEqual(objects[1:5], shard_broker.get_objects()) + shard_broker = ContainerBroker(expected_shard_dbs[1]) + self.assertEqual( + [{'remote_id': local_retiring_db_id, 'sync_point': 7}, + {'remote_id': remote_retiring_db_id, 'sync_point': 5}], + shard_broker.get_syncs()) + self.assertEqual(objects[5:8], shard_broker.get_objects()) + + # local db replicates to remote, so remote now has shard ranges + # note: no objects replicated because local is sharded + self.assertFalse(remote_broker.get_shard_ranges()) + replicate(remote_node, local_broker, 0) + self._assert_shard_ranges_equal(local_broker.get_shard_ranges(), + remote_broker.get_shard_ranges()) + + # remote db gets 3 new objects, bringing its total to 8 + remote_broker.merge_items([dict(obj) for obj in objects[:1]]) + remote_broker.merge_items([dict(obj) for obj in objects[8:]]) + + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + with self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + return_value=(True, [True, True, True])) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(remote_broker, remote_node, 0) + + # shard brokers have sync points for the remote db so only new objects + # are merged from remote broker to shard brokers + self.assertEqual([(shard_ranges[0].name, objects[:1]), + (shard_ranges[1].name, objects[8:])], + merge_items_calls) + # sync points are updated + shard_broker = ContainerBroker(expected_shard_dbs[0]) + self.assertEqual( + [{'remote_id': local_retiring_db_id, 'sync_point': 7}, + {'remote_id': remote_retiring_db_id, 'sync_point': 8}], + shard_broker.get_syncs()) + self.assertEqual(objects[:5], shard_broker.get_objects()) + shard_broker = ContainerBroker(expected_shard_dbs[1]) + self.assertEqual( + [{'remote_id': local_retiring_db_id, 'sync_point': 7}, + {'remote_id': remote_retiring_db_id, 'sync_point': 8}], + shard_broker.get_syncs()) + self.assertEqual(objects[5:], shard_broker.get_objects()) + + def _check_complete_sharding(self, account, container, shard_bounds): + broker = self._make_sharding_broker( + account=account, container=container, shard_bounds=shard_bounds) + obj = {'name': 'obj', 'created_at': next(self.ts_iter).internal, + 'size': 14, 'content_type': 'text/plain', 'etag': 'an etag', + 'deleted': 0} + broker.get_brokers()[0].merge_items([obj]) + self.assertEqual(2, len(broker.db_files)) # sanity check + + def check_not_complete(): + with self._mock_sharder() as sharder: + self.assertFalse(sharder._complete_sharding(broker)) + warning_lines = sharder.logger.get_lines_for_level('warning') + self.assertIn( + 'Repeat cleaving required for %r' % broker.db_files[0], + warning_lines[0]) + self.assertFalse(warning_lines[1:]) + sharder.logger.clear() + context = CleavingContext.load(broker) + self.assertFalse(context.cleaving_done) + self.assertFalse(context.misplaced_done) + self.assertEqual('', context.cursor) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + for shard_range in broker.get_shard_ranges(): + self.assertEqual(ShardRange.CLEAVED, shard_range.state) + self.assertEqual(SHARDING, broker.get_db_state()) + + # no cleave context progress + check_not_complete() + + # cleaving_done is False + context = CleavingContext.load(broker) + self.assertEqual(1, context.max_row) + context.cleave_to_row = 1 # pretend all rows have been cleaved + context.cleaving_done = False + context.misplaced_done = True + context.store(broker) + check_not_complete() + + # misplaced_done is False + context.misplaced_done = False + context.cleaving_done = True + context.store(broker) + check_not_complete() + + # modified db max row + old_broker = broker.get_brokers()[0] + obj = {'name': 'obj', 'created_at': next(self.ts_iter).internal, + 'size': 14, 'content_type': 'text/plain', 'etag': 'an etag', + 'deleted': 1} + old_broker.merge_items([obj]) + self.assertGreater(old_broker.get_max_row(), context.max_row) + context.misplaced_done = True + context.cleaving_done = True + context.store(broker) + check_not_complete() + + # db id changes + broker.get_brokers()[0].newid('fake_remote_id') + context.cleave_to_row = 2 # pretend all rows have been cleaved, again + context.store(broker) + check_not_complete() + + # context ok + context = CleavingContext.load(broker) + context.cleave_to_row = context.max_row + context.misplaced_done = True + context.cleaving_done = True + context.store(broker) + with self._mock_sharder() as sharder: + self.assertTrue(sharder._complete_sharding(broker)) + self.assertEqual(SHARDED, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDED, + broker.get_own_shard_range().state) + for shard_range in broker.get_shard_ranges(): + self.assertEqual(ShardRange.ACTIVE, shard_range.state) + warning_lines = sharder.logger.get_lines_for_level('warning') + self.assertFalse(warning_lines) + sharder.logger.clear() + return broker + + def test_complete_sharding_root(self): + broker = self._check_complete_sharding( + 'a', 'c', (('', 'mid'), ('mid', ''))) + self.assertEqual(0, broker.get_own_shard_range().deleted) + + def test_complete_sharding_shard(self): + broker = self._check_complete_sharding( + '.shards_', 'shard_c', (('l', 'mid'), ('mid', 'u'))) + self.assertEqual(1, broker.get_own_shard_range().deleted) + + def test_identify_sharding_candidate(self): + brokers = [self._make_broker(container='c%03d' % i) for i in range(6)] + for broker in brokers: + broker.set_sharding_sysmeta('Root', 'a/c') + node = {'index': 2} + # containers are all empty + with self._mock_sharder() as sharder: + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + expected_stats = {} + self._assert_stats(expected_stats, sharder, 'sharding_candidates') + + objects = [ + ['obj%3d' % i, next(self.ts_iter).internal, i, 'text/plain', + 'etag%s' % i, 0] for i in range(160)] + + # one container has 100 objects, which is below the sharding threshold + for obj in objects[:100]: + brokers[0].put_object(*obj) + conf = {'recon_cache_path': self.tempdir} + with self._mock_sharder(conf=conf) as sharder: + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + self.assertFalse(sharder.sharding_candidates) + expected_recon = { + 'found': 0, + 'top': []} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # reduce the sharding threshold and the container is reported + conf = {'shard_container_threshold': 100, + 'recon_cache_path': self.tempdir} + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now() as now: + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + stats_0 = {'path': brokers[0].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c000', + 'root': 'a/c', + 'object_count': 100, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[0].db_file).st_size} + self.assertEqual([stats_0], sharder.sharding_candidates) + expected_recon = { + 'found': 1, + 'top': [stats_0]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # repeat with handoff node and db_file error + with self._mock_sharder(conf=conf) as sharder: + with mock.patch('os.stat', side_effect=OSError('test error')): + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, {}) + stats_0_b = {'path': brokers[0].db_file, + 'node_index': None, + 'account': 'a', + 'container': 'c000', + 'root': 'a/c', + 'object_count': 100, + 'meta_timestamp': now.internal, + 'file_size': None} + self.assertEqual([stats_0_b], sharder.sharding_candidates) + self._assert_stats(expected_stats, sharder, 'sharding_candidates') + expected_recon = { + 'found': 1, + 'top': [stats_0_b]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # load up another container, but not to threshold for sharding, and + # verify it is never a candidate for sharding + for obj in objects[:50]: + brokers[2].put_object(*obj) + own_sr = brokers[2].get_own_shard_range() + for state in ShardRange.STATES: + own_sr.update_state(state, state_timestamp=Timestamp.now()) + brokers[2].merge_shard_ranges([own_sr]) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + with annotate_failure(state): + self.assertEqual([stats_0], sharder.sharding_candidates) + + # reduce the threshold and the second container is included + conf = {'shard_container_threshold': 50, + 'recon_cache_path': self.tempdir} + own_sr.update_state(ShardRange.ACTIVE, state_timestamp=Timestamp.now()) + brokers[2].merge_shard_ranges([own_sr]) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + stats_2 = {'path': brokers[2].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c002', + 'root': 'a/c', + 'object_count': 50, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[2].db_file).st_size} + self.assertEqual([stats_0, stats_2], sharder.sharding_candidates) + expected_recon = { + 'found': 2, + 'top': [stats_0, stats_2]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # a broker not in active state is not included + own_sr = brokers[0].get_own_shard_range() + for state in ShardRange.STATES: + if state == ShardRange.ACTIVE: + continue + own_sr.update_state(state, state_timestamp=Timestamp.now()) + brokers[0].merge_shard_ranges([own_sr]) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + with annotate_failure(state): + self.assertEqual([stats_2], sharder.sharding_candidates) + + own_sr.update_state(ShardRange.ACTIVE, state_timestamp=Timestamp.now()) + brokers[0].merge_shard_ranges([own_sr]) + + # load up a third container with 150 objects + for obj in objects[:150]: + brokers[5].put_object(*obj) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + stats_5 = {'path': brokers[5].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c005', + 'root': 'a/c', + 'object_count': 150, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[5].db_file).st_size} + self.assertEqual([stats_0, stats_2, stats_5], + sharder.sharding_candidates) + # note recon top list is sorted by size + expected_recon = { + 'found': 3, + 'top': [stats_5, stats_0, stats_2]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # restrict the number of reported candidates + conf = {'shard_container_threshold': 50, + 'recon_cache_path': self.tempdir, + 'recon_candidates_limit': 2} + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + self.assertEqual([stats_0, stats_2, stats_5], + sharder.sharding_candidates) + expected_recon = { + 'found': 3, + 'top': [stats_5, stats_0]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # unrestrict the number of reported candidates + conf = {'shard_container_threshold': 50, + 'recon_cache_path': self.tempdir, + 'recon_candidates_limit': -1} + for i, broker in enumerate([brokers[1]] + brokers[3:5]): + for obj in objects[:(151 + i)]: + broker.put_object(*obj) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + + stats_4 = {'path': brokers[4].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c004', + 'root': 'a/c', + 'object_count': 153, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[4].db_file).st_size} + stats_3 = {'path': brokers[3].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c003', + 'root': 'a/c', + 'object_count': 152, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[3].db_file).st_size} + stats_1 = {'path': brokers[1].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c001', + 'root': 'a/c', + 'object_count': 151, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[1].db_file).st_size} + + self.assertEqual( + [stats_0, stats_1, stats_2, stats_3, stats_4, stats_5], + sharder.sharding_candidates) + self._assert_stats(expected_stats, sharder, 'sharding_candidates') + expected_recon = { + 'found': 6, + 'top': [stats_4, stats_3, stats_1, stats_5, stats_0, stats_2]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + def test_misplaced_objects_root_container(self): + broker = self._make_broker() + broker.enable_sharding(next(self.ts_iter)) + + objects = [ + # misplaced objects in second and third shard ranges + ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0], + ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 1], + ['where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0], + # deleted + ['x', self.ts_encoded(), 0, '', '', 1, 1], + ] + + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', 'yonder'), + ('yonder', '')) + initial_shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE) + expected_shard_dbs = [] + for shard_range in initial_shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(initial_shard_ranges) + + # unsharded + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 0, 'placed': 0, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + + # sharding - no misplaced objects + self.assertTrue(broker.set_sharding_state()) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + + # pretend we cleaved up to end of second shard range + context = CleavingContext.load(broker) + context.cursor = 'there' + context.store(broker) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + + # sharding - misplaced objects + for obj in objects: + broker.put_object(*obj) + # pretend we have not cleaved any ranges + context.cursor = '' + context.store(broker) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + self.assertFalse(os.path.exists(expected_shard_dbs[3])) + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + + # pretend we cleaved up to end of second shard range + context.cursor = 'there' + context.store(broker) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[1], 0) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved + self._check_objects(objects[:2], expected_shard_dbs[1]) + # ... and removed from the source db + self._check_objects(objects[2:], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + self.assertFalse(os.path.exists(expected_shard_dbs[3])) + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + + # pretend we cleaved up to end of fourth shard range + context.cursor = 'yonder' + context.store(broker) + # and some new misplaced updates arrived in the first shard range + new_objects = [ + ['b', self.ts_encoded(), 10, 'text/plain', 'etag_b', 0, 0], + ['c', self.ts_encoded(), 20, 'text/plain', 'etag_c', 0, 0], + ] + for obj in new_objects: + broker.put_object(*obj) + + # check that *all* misplaced objects are moved despite exceeding + # the listing limit + with self._mock_sharder(conf={'cleave_row_batch_size': 2}) as sharder: + sharder._move_misplaced_objects(broker) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in expected_shard_dbs[2:4]], + any_order=True + ) + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check misplaced objects were moved + self._check_objects(new_objects, expected_shard_dbs[0]) + self._check_objects(objects[:2], expected_shard_dbs[1]) + self._check_objects(objects[2:3], expected_shard_dbs[2]) + self._check_objects(objects[3:], expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + + # pretend we cleaved all ranges - sharded state + self.assertTrue(broker.set_sharded_state()) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 0, 'placed': 0, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + + # and then more misplaced updates arrive + newer_objects = [ + ['a', self.ts_encoded(), 51, 'text/plain', 'etag_a', 0, 0], + ['z', self.ts_encoded(), 52, 'text/plain', 'etag_z', 0, 0], + ] + for obj in newer_objects: + broker.put_object(*obj) + broker.get_info() # force updates to be committed + # sanity check the puts landed in sharded broker + self._check_objects(newer_objects, broker.db_file) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) + for db in (expected_shard_dbs[0], expected_shard_dbs[-1])], + any_order=True + ) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check new misplaced objects were moved + self._check_objects(newer_objects[:1] + new_objects, + expected_shard_dbs[0]) + self._check_objects(newer_objects[1:], expected_shard_dbs[4]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + # ... and other shard dbs were unchanged + self._check_objects(objects[:2], expected_shard_dbs[1]) + self._check_objects(objects[2:3], expected_shard_dbs[2]) + self._check_objects(objects[3:], expected_shard_dbs[3]) + + def _setup_misplaced_objects(self): + # make a broker with shard ranges, move it to sharded state and then + # put some misplaced objects in it + broker = self._make_broker() + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', 'yonder'), + ('yonder', '')) + initial_shard_ranges = [ + ShardRange('.shards_a/%s-%s' % (lower, upper), + Timestamp.now(), lower, upper, state=ShardRange.ACTIVE) + for lower, upper in shard_bounds + ] + expected_dbs = [] + for shard_range in initial_shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(initial_shard_ranges) + objects = [ + # misplaced objects in second, third and fourth shard ranges + ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0], + ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0], + ['where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0], + # deleted + ['x', self.ts_encoded(), 0, '', '', 1, 0], + ] + broker.enable_sharding(Timestamp.now()) + self.assertTrue(broker.set_sharding_state()) + self.assertTrue(broker.set_sharded_state()) + for obj in objects: + broker.put_object(*obj) + self.assertEqual(SHARDED, broker.get_db_state()) + return broker, objects, expected_dbs + + def test_misplaced_objects_newer_objects(self): + # verify that objects merged to the db after misplaced objects have + # been identified are not removed from the db + broker, objects, expected_dbs = self._setup_misplaced_objects() + newer_objects = [ + ['j', self.ts_encoded(), 51, 'text/plain', 'etag_j', 0, 0], + ['k', self.ts_encoded(), 52, 'text/plain', 'etag_k', 1, 0], + ] + + calls = [] + pre_removal_objects = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + if db == expected_dbs[1]: + # put some new objects in the shard range that is being + # replicated before misplaced objects are removed from that + # range in the source db + for obj in newer_objects: + broker.put_object(*obj) + # grab a snapshot of the db contents - a side effect is + # that the newer objects are now committed to the db + pre_removal_objects.extend( + broker.get_objects()) + return True, [True, True, True] + + with self._mock_sharder(replicas=3) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + # sanity check - the newer objects were in the db before the misplaced + # object were removed + for obj in newer_objects: + self.assertIn(obj[0], [o['name'] for o in pre_removal_objects]) + for obj in objects[:2]: + self.assertIn(obj[0], [o['name'] for o in pre_removal_objects]) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but newer objects were not removed from the source db + self._check_objects(newer_objects, broker.db_file) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + + # they will be moved on next cycle + unlink_files(expected_dbs) + with self._mock_sharder(replicas=3) as sharder: + sharder._move_misplaced_objects(broker) + + self._check_objects(newer_objects, expected_dbs[1]) + self._check_objects([], broker.db_file) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + + def test_misplaced_objects_db_id_changed(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + pre_info = broker.get_info() + calls = [] + expected_retained_objects = [] + expected_retained_objects_dbs = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + if len(calls) == 2: + broker.newid('fake_remote_id') + # grab snapshot of the objects in the broker when it changed id + expected_retained_objects.extend( + self._get_raw_object_records(broker)) + if len(calls) >= 2: + expected_retained_objects_dbs.append(db) + return True, [True, True, True] + + with self._mock_sharder(replicas=3) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + # sanity checks + self.assertNotEqual(pre_info['id'], broker.get_info()['id']) + self.assertTrue(expected_retained_objects) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but objects were not removed after the source db id changed + self._check_objects(expected_retained_objects, broker.db_file) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 1, 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Refused to remove misplaced objects', lines[0]) + self.assertIn('Refused to remove misplaced objects', lines[1]) + self.assertFalse(lines[2:]) + + # they will be moved again on next cycle + unlink_files(expected_dbs) + sharder.logger.clear() + with self._mock_sharder(replicas=3) as sharder: + sharder._move_misplaced_objects(broker) + + self.assertEqual(2, len(set(expected_retained_objects_dbs))) + for db in expected_retained_objects_dbs: + if db == expected_dbs[1]: + self._check_objects(objects[:2], expected_dbs[1]) + if db == expected_dbs[2]: + self._check_objects(objects[2:3], expected_dbs[2]) + if db == expected_dbs[3]: + self._check_objects(objects[3:], expected_dbs[3]) + self._check_objects([], broker.db_file) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': len(expected_retained_objects), + 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + + def test_misplaced_objects_sufficient_replication(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + with self._mock_sharder(replicas=3) as sharder: + sharder._replicate_object.return_value = (True, [True, True, True]) + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_dbs[2:4])], + any_order=True) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_dbs[0])) + self.assertFalse(os.path.exists(expected_dbs[4])) + + def test_misplaced_objects_insufficient_replication_3_replicas(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + returns = {expected_dbs[1]: (True, [True, True, True]), # ok + expected_dbs[2]: (False, [True, False, False]), # < quorum + expected_dbs[3]: (False, [False, True, True])} # ok + calls = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + return returns[db] + + with self._mock_sharder(replicas=3) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved to shard dbs + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but only removed from the source db if sufficiently replicated + self._check_objects(objects[2:3], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_dbs[0])) + self.assertFalse(os.path.exists(expected_dbs[4])) + + def test_misplaced_objects_insufficient_replication_2_replicas(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + returns = {expected_dbs[1]: (True, [True, True]), # ok + expected_dbs[2]: (False, [True, False]), # ok + expected_dbs[3]: (False, [False, False])} # < quorum> + calls = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + return returns[db] + + with self._mock_sharder(replicas=2) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved to shard dbs + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but only removed from the source db if sufficiently replicated + self._check_objects(objects[3:], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_dbs[0])) + self.assertFalse(os.path.exists(expected_dbs[4])) + + def test_misplaced_objects_insufficient_replication_4_replicas(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + returns = {expected_dbs[1]: (False, [True, False, False, False]), + expected_dbs[2]: (True, [True, False, False, True]), + expected_dbs[3]: (False, [False, False, False, False])} + calls = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + return returns[db] + + with self._mock_sharder(replicas=4) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved to shard dbs + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but only removed from the source db if sufficiently replicated + self._check_objects(objects[:2] + objects[3:], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_dbs[0])) + self.assertFalse(os.path.exists(expected_dbs[4])) + + def _check_misplaced_objects_shard_container_unsharded(self, conf=None): + broker = self._make_broker(account='.shards_a', container='.shard_c') + ts_shard = next(self.ts_iter) + own_sr = ShardRange(broker.path, ts_shard, 'here', 'where') + broker.merge_shard_ranges([own_sr]) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertEqual(own_sr, broker.get_own_shard_range()) # sanity check + self.assertEqual(UNSHARDED, broker.get_db_state()) + + objects = [ + # some of these are misplaced objects + ['b', self.ts_encoded(), 2, 'text/plain', 'etag_b', 0, 0], + ['here', self.ts_encoded(), 2, 'text/plain', 'etag_here', 0, 0], + ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0], + ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0], + ['x', self.ts_encoded(), 0, '', '', 1, 0], # deleted + ['y', self.ts_encoded(), 10, 'text/plain', 'etag_y', 0, 0], + ] + + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', '')) + root_shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE) + expected_shard_dbs = [] + for sr in root_shard_ranges: + db_hash = hash_path(sr.account, sr.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + # no objects + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_not_called() + + sharder._replicate_object.assert_not_called() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 0, 'placed': 0, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # now put objects + for obj in objects: + broker.put_object(*obj) + self._check_objects(objects, broker.db_file) # sanity check + + # NB final shard range not available + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges[:-1]) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': '', + 'end_marker': 'here\x00'}), + mock.call(broker, newest=True, params={'states': 'updating', + 'marker': 'where', + 'end_marker': ''})]) + sharder._replicate_object.assert_called_with( + 0, expected_shard_dbs[0], 0), + + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 1, 'placed': 2, 'unplaced': 2} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # some misplaced objects could not be moved... + warning_lines = sharder.logger.get_lines_for_level('warning') + self.assertIn( + 'Failed to find destination for at least 2 misplaced objects', + warning_lines[0]) + self.assertFalse(warning_lines[1:]) + sharder.logger.clear() + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_shard_dbs[0]) + # ... and removed from the source db + self._check_objects(objects[2:], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + self.assertFalse(os.path.exists(expected_shard_dbs[3])) + + # repeat with final shard range available + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': 'where', + 'end_marker': ''})]) + + sharder._replicate_object.assert_called_with( + 0, expected_shard_dbs[-1], 0), + + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_shard_dbs[0]) + self._check_objects(objects[4:], expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[2:4], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + # repeat - no work remaining + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_not_called() + sharder._replicate_object.assert_not_called() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 0, 'placed': 0, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # and then more misplaced updates arrive + new_objects = [ + ['a', self.ts_encoded(), 51, 'text/plain', 'etag_a', 0, 0], + ['z', self.ts_encoded(), 52, 'text/plain', 'etag_z', 0, 0], + ] + for obj in new_objects: + broker.put_object(*obj) + # sanity check the puts landed in sharded broker + self._check_objects(new_objects[:1] + objects[2:4] + new_objects[1:], + broker.db_file) + + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': '', + 'end_marker': 'here\x00'}), + mock.call(broker, newest=True, params={'states': 'updating', + 'marker': 'where', + 'end_marker': ''})]) + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) + for db in (expected_shard_dbs[0], expected_shard_dbs[3])], + any_order=True + ) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # check new misplaced objects were moved + self._check_objects(new_objects[:1] + objects[:2], + expected_shard_dbs[0]) + self._check_objects(objects[4:] + new_objects[1:], + expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[2:4], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + def test_misplaced_objects_shard_container_unsharded(self): + self._check_misplaced_objects_shard_container_unsharded() + + def test_misplaced_objects_shard_container_unsharded_limit_two(self): + self._check_misplaced_objects_shard_container_unsharded( + conf={'cleave_row_batch_size': 2}) + + def test_misplaced_objects_shard_container_unsharded_limit_one(self): + self._check_misplaced_objects_shard_container_unsharded( + conf={'cleave_row_batch_size': 1}) + + def test_misplaced_objects_shard_container_sharding(self): + broker = self._make_broker(account='.shards_a', container='shard_c') + ts_shard = next(self.ts_iter) + # note that own_sr spans two root shard ranges + own_sr = ShardRange(broker.path, ts_shard, 'here', 'where') + own_sr.update_state(ShardRange.SHARDING) + own_sr.epoch = next(self.ts_iter) + broker.merge_shard_ranges([own_sr]) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertEqual(own_sr, broker.get_own_shard_range()) # sanity check + self.assertEqual(UNSHARDED, broker.get_db_state()) + + objects = [ + # some of these are misplaced objects + ['b', self.ts_encoded(), 2, 'text/plain', 'etag_b', 0, 0], + ['here', self.ts_encoded(), 2, 'text/plain', 'etag_here', 0, 0], + ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0], + ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0], + ['v', self.ts_encoded(), 10, 'text/plain', 'etag_v', 0, 0], + ['y', self.ts_encoded(), 10, 'text/plain', 'etag_y', 0, 0], + ] + + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', '')) + root_shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE) + expected_shard_dbs = [] + for sr in root_shard_ranges: + db_hash = hash_path(sr.account, sr.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + # pretend broker is sharding but not yet cleaved a shard + self.assertTrue(broker.set_sharding_state()) + broker.merge_shard_ranges([dict(sr) for sr in root_shard_ranges[1:3]]) + # then some updates arrive + for obj in objects: + broker.put_object(*obj) + broker.get_info() + self._check_objects(objects, broker.db_file) # sanity check + + # first destination is not available + with self._mock_sharder() as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges[1:]) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': '', + 'end_marker': 'here\x00'}), + mock.call(broker, newest=True, params={'states': 'updating', + 'marker': 'where', + 'end_marker': ''})]) + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[-1], 0)], + ) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 1, 'placed': 1, 'unplaced': 2} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + warning_lines = sharder.logger.get_lines_for_level('warning') + self.assertIn( + 'Failed to find destination for at least 2 misplaced objects', + warning_lines[0]) + self.assertFalse(warning_lines[1:]) + sharder.logger.clear() + + # check some misplaced objects were moved + self._check_objects(objects[5:], expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[:5], broker.db_file) + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + # normality resumes and all destinations are available + with self._mock_sharder() as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': '', + 'end_marker': 'here\x00'})] + ) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[0], 0)], + ) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_shard_dbs[0]) + self._check_objects(objects[5:], expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[2:5], broker.db_file) + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + # pretend first shard has been cleaved + context = CleavingContext.load(broker) + context.cursor = 'there' + context.store(broker) + # and then more misplaced updates arrive + new_objects = [ + ['a', self.ts_encoded(), 51, 'text/plain', 'etag_a', 0, 0], + # this one is in the now cleaved shard range... + ['k', self.ts_encoded(), 52, 'text/plain', 'etag_k', 0, 0], + ['z', self.ts_encoded(), 53, 'text/plain', 'etag_z', 0, 0], + ] + for obj in new_objects: + broker.put_object(*obj) + broker.get_info() # force updates to be committed + # sanity check the puts landed in sharded broker + self._check_objects(sorted(new_objects + objects[2:5]), broker.db_file) + with self._mock_sharder() as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, + params={'states': 'updating', 'marker': '', + 'end_marker': 'there\x00'}), + mock.call(broker, newest=True, + params={'states': 'updating', 'marker': 'where', + 'end_marker': ''})]) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_shard_dbs[0], + expected_shard_dbs[1], + expected_shard_dbs[-1])], + any_order=True + ) + + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 5, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # check *all* the misplaced objects were moved + self._check_objects(new_objects[:1] + objects[:2], + expected_shard_dbs[0]) + self._check_objects(new_objects[1:2] + objects[2:4], + expected_shard_dbs[1]) + self._check_objects(objects[5:] + new_objects[2:], + expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[4:5], broker.db_file) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + def test_misplaced_objects_deleted_and_updated(self): + # setup + broker = self._make_broker() + broker.enable_sharding(next(self.ts_iter)) + + shard_bounds = (('', 'here'), ('here', '')) + root_shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE) + expected_shard_dbs = [] + for sr in root_shard_ranges: + db_hash = hash_path(sr.account, sr.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(root_shard_ranges) + self.assertTrue(broker.set_sharding_state()) + + ts_older_internal = self.ts_encoded() # used later + # put deleted objects into source + objects = [ + ['b', self.ts_encoded(), 0, '', '', 1, 0], + ['x', self.ts_encoded(), 0, '', '', 1, 0] + ] + for obj in objects: + broker.put_object(*obj) + broker.get_info() + self._check_objects(objects, broker.db_file) # sanity check + # pretend we cleaved all ranges - sharded state + self.assertTrue(broker.set_sharded_state()) + + with self._mock_sharder() as sharder: + sharder.logger = debug_logger() + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_shard_dbs[0], + expected_shard_dbs[1])], + any_order=True + ) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check new misplaced objects were moved + self._check_objects(objects[:1], expected_shard_dbs[0]) + self._check_objects(objects[1:], expected_shard_dbs[1]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + + # update source db with older undeleted versions of same objects + old_objects = [ + ['b', ts_older_internal, 2, 'text/plain', 'etag_b', 0, 0], + ['x', ts_older_internal, 4, 'text/plain', 'etag_x', 0, 0] + ] + for obj in old_objects: + broker.put_object(*obj) + broker.get_info() + self._check_objects(old_objects, broker.db_file) # sanity check + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_shard_dbs[0], + expected_shard_dbs[1])], + any_order=True + ) + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check older misplaced objects were not merged to shard brokers + self._check_objects(objects[:1], expected_shard_dbs[0]) + self._check_objects(objects[1:], expected_shard_dbs[1]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + + # the destination shard dbs for misplaced objects may already exist so + # check they are updated correctly when overwriting objects + # update source db with newer deleted versions of same objects + new_objects = [ + ['b', self.ts_encoded(), 0, '', '', 1, 0], + ['x', self.ts_encoded(), 0, '', '', 1, 0] + ] + for obj in new_objects: + broker.put_object(*obj) + broker.get_info() + self._check_objects(new_objects, broker.db_file) # sanity check + shard_broker = ContainerBroker( + expected_shard_dbs[0], account=root_shard_ranges[0].account, + container=root_shard_ranges[0].container) + # update one shard container with even newer version of object + timestamps = [next(self.ts_iter) for i in range(7)] + ts_newer = encode_timestamps( + timestamps[1], timestamps[3], timestamps[5]) + newer_object = ('b', ts_newer, 10, 'text/plain', 'etag_b', 0, 0) + shard_broker.put_object(*newer_object) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_shard_dbs[0], + expected_shard_dbs[1])], + any_order=True + ) + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check only the newer misplaced object was moved + self._check_objects([newer_object], expected_shard_dbs[0]) + self._check_objects(new_objects[1:], expected_shard_dbs[1]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + + # update source with a version of 'b' that has newer data + # but older content-type and metadata relative to shard object + ts_update = encode_timestamps( + timestamps[2], timestamps[3], timestamps[4]) + update_object = ('b', ts_update, 20, 'text/ignored', 'etag_newer', 0, + 0) + broker.put_object(*update_object) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + ts_expected = encode_timestamps( + timestamps[2], timestamps[3], timestamps[5]) + expected = ('b', ts_expected, 20, 'text/plain', 'etag_newer', 0, 0) + self._check_objects([expected], expected_shard_dbs[0]) + self._check_objects([], broker.db_file) + + # update source with a version of 'b' that has older data + # and content-type but newer metadata relative to shard object + ts_update = encode_timestamps( + timestamps[1], timestamps[3], timestamps[6]) + update_object = ('b', ts_update, 999, 'text/ignored', 'etag_b', 0, 0) + broker.put_object(*update_object) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + ts_expected = encode_timestamps( + timestamps[2], timestamps[3], timestamps[6]) + expected = ('b', ts_expected, 20, 'text/plain', 'etag_newer', 0, 0) + self._check_objects([expected], expected_shard_dbs[0]) + self._check_objects([], broker.db_file) + + # update source with a version of 'b' that has older data + # but newer content-type and metadata + ts_update = encode_timestamps( + timestamps[2], timestamps[6], timestamps[6]) + update_object = ('b', ts_update, 999, 'text/newer', 'etag_b', 0, 0) + broker.put_object(*update_object) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + ts_expected = encode_timestamps( + timestamps[2], timestamps[6], timestamps[6]) + expected = ('b', ts_expected, 20, 'text/newer', 'etag_newer', 0, 0) + self._check_objects([expected], expected_shard_dbs[0]) + self._check_objects([], broker.db_file) + + def _setup_find_ranges(self, account, cont, lower, upper): + broker = self._make_broker(account=account, container=cont) + own_sr = ShardRange('%s/%s' % (account, cont), Timestamp.now(), + lower, upper) + broker.merge_shard_ranges([own_sr]) + broker.set_sharding_sysmeta('Root', 'a/c') + objects = [ + # some of these are misplaced objects + ['obj%3d' % i, self.ts_encoded(), i, 'text/plain', 'etag%s' % i, 0] + for i in range(100)] + for obj in objects: + broker.put_object(*obj) + return broker, objects + + def _check_find_shard_ranges_none_found(self, broker, objects): + with self._mock_sharder() as sharder: + num_found = sharder._find_shard_ranges(broker) + self.assertGreater(sharder.split_size, len(objects)) + self.assertEqual(0, num_found) + self.assertFalse(broker.get_shard_ranges()) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 0, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + with self._mock_sharder( + conf={'shard_container_threshold': 200}) as sharder: + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(sharder.split_size, len(objects)) + self.assertEqual(0, num_found) + self.assertFalse(broker.get_shard_ranges()) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 0, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + def test_find_shard_ranges_none_found_root(self): + broker, objects = self._setup_find_ranges('a', 'c', '', '') + self._check_find_shard_ranges_none_found(broker, objects) + + def test_find_shard_ranges_none_found_shard(self): + broker, objects = self._setup_find_ranges( + '.shards_a', 'c', 'lower', 'upper') + self._check_find_shard_ranges_none_found(broker, objects) + + def _check_find_shard_ranges_finds_two(self, account, cont, lower, upper): + def check_ranges(): + self.assertEqual(2, len(broker.get_shard_ranges())) + expected_ranges = [ + ShardRange( + ShardRange.make_path('.int_shards_a', 'c', cont, now, 0), + now, lower, objects[98][0], 99), + ShardRange( + ShardRange.make_path('.int_shards_a', 'c', cont, now, 1), + now, objects[98][0], upper, 1), + ] + self._assert_shard_ranges_equal(expected_ranges, + broker.get_shard_ranges()) + + # first invocation finds both ranges + broker, objects = self._setup_find_ranges( + account, cont, lower, upper) + with self._mock_sharder(conf={'shard_container_threshold': 199, + 'auto_create_account_prefix': '.int_'} + ) as sharder: + with mock_timestamp_now() as now: + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(99, sharder.split_size) + self.assertEqual(2, num_found) + check_ranges() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 2, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + # second invocation finds none + with self._mock_sharder(conf={'shard_container_threshold': 199, + 'auto_create_account_prefix': '.int_'} + ) as sharder: + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(0, num_found) + self.assertEqual(2, len(broker.get_shard_ranges())) + check_ranges() + expected_stats = {'attempted': 0, 'success': 0, 'failure': 0, + 'found': 0, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + def test_find_shard_ranges_finds_two_root(self): + self._check_find_shard_ranges_finds_two('a', 'c', '', '') + + def test_find_shard_ranges_finds_two_shard(self): + self._check_find_shard_ranges_finds_two('.shards_a', 'c_', 'l', 'u') + + def _check_find_shard_ranges_finds_three(self, account, cont, lower, + upper): + broker, objects = self._setup_find_ranges( + account, cont, lower, upper) + now = Timestamp.now() + expected_ranges = [ + ShardRange( + ShardRange.make_path('.shards_a', 'c', cont, now, 0), + now, lower, objects[44][0], 45), + ShardRange( + ShardRange.make_path('.shards_a', 'c', cont, now, 1), + now, objects[44][0], objects[89][0], 45), + ShardRange( + ShardRange.make_path('.shards_a', 'c', cont, now, 2), + now, objects[89][0], upper, 10), + ] + # first invocation finds 2 ranges + with self._mock_sharder( + conf={'shard_container_threshold': 90, + 'shard_scanner_batch_size': 2}) as sharder: + with mock_timestamp_now(now): + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(45, sharder.split_size) + self.assertEqual(2, num_found) + self.assertEqual(2, len(broker.get_shard_ranges())) + self._assert_shard_ranges_equal(expected_ranges[:2], + broker.get_shard_ranges()) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 2, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + # second invocation finds third shard range + with self._mock_sharder(conf={'shard_container_threshold': 199, + 'shard_scanner_batch_size': 2} + ) as sharder: + with mock_timestamp_now(now): + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(1, num_found) + self.assertEqual(3, len(broker.get_shard_ranges())) + self._assert_shard_ranges_equal(expected_ranges, + broker.get_shard_ranges()) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + # third invocation finds none + with self._mock_sharder(conf={'shard_container_threshold': 199, + 'shard_scanner_batch_size': 2} + ) as sharder: + sharder._send_shard_ranges = mock.MagicMock(return_value=True) + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(0, num_found) + self.assertEqual(3, len(broker.get_shard_ranges())) + self._assert_shard_ranges_equal(expected_ranges, + broker.get_shard_ranges()) + expected_stats = {'attempted': 0, 'success': 0, 'failure': 0, + 'found': 0, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + def test_find_shard_ranges_finds_three_root(self): + self._check_find_shard_ranges_finds_three('a', 'c', '', '') + + def test_find_shard_ranges_finds_three_shard(self): + self._check_find_shard_ranges_finds_three('.shards_a', 'c_', 'l', 'u') + + def test_sharding_enabled(self): + broker = self._make_broker() + self.assertFalse(sharding_enabled(broker)) + broker.update_metadata( + {'X-Container-Sysmeta-Sharding': + ('yes', Timestamp.now().internal)}) + self.assertTrue(sharding_enabled(broker)) + # deleting broker clears sharding sysmeta + broker.delete_db(Timestamp.now().internal) + self.assertFalse(sharding_enabled(broker)) + # but if broker has a shard range then sharding is enabled + broker.merge_shard_ranges( + ShardRange('acc/a_shard', Timestamp.now(), 'l', 'u')) + self.assertTrue(sharding_enabled(broker)) + + def test_send_shard_ranges(self): + shard_ranges = self._make_shard_ranges((('', 'h'), ('h', ''))) + + def do_test(replicas, *resp_codes): + sent_data = defaultdict(str) + + def on_send(fake_conn, data): + sent_data[fake_conn] += data + + with self._mock_sharder(replicas=replicas) as sharder: + with mocked_http_conn(*resp_codes, give_send=on_send) as conn: + with mock_timestamp_now() as now: + res = sharder._send_shard_ranges( + 'a', 'c', shard_ranges) + + self.assertEqual(sharder.ring.replica_count, len(conn.requests)) + expected_body = json.dumps([dict(sr) for sr in shard_ranges]) + expected_headers = {'Content-Type': 'application/json', + 'Content-Length': str(len(expected_body)), + 'X-Timestamp': now.internal, + 'X-Backend-Record-Type': 'shard', + 'User-Agent': mock.ANY} + for data in sent_data.values(): + self.assertEqual(expected_body, data) + hosts = set() + for req in conn.requests: + path_parts = req['path'].split('/')[1:] + hosts.add('%s:%s/%s' % (req['ip'], req['port'], path_parts[0])) + # FakeRing only has one partition + self.assertEqual('0', path_parts[1]) + self.assertEqual('PUT', req['method']) + self.assertEqual(['a', 'c'], path_parts[-2:]) + req_headers = req['headers'] + for k, v in expected_headers.items(): + self.assertEqual(v, req_headers[k]) + self.assertTrue( + req_headers['User-Agent'].startswith('container-sharder')) + self.assertEqual(sharder.ring.replica_count, len(hosts)) + return res, sharder + + replicas = 3 + res, sharder = do_test(replicas, 202, 202, 202) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, 202, 404) + self.assertTrue(res) + self.assertEqual([True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, 202, Exception) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, 202, 404, 404) + self.assertFalse(res) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 500, 500, 500) + self.assertFalse(res) + self.assertEqual([True, True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, Exception, Exception, 202) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, Exception, eventlet.Timeout(), 202) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + + replicas = 2 + res, sharder = do_test(replicas, 202, 202) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, 404) + self.assertTrue(res) + self.assertEqual([True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, Exception) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, 404, 404) + self.assertFalse(res) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, Exception, Exception) + self.assertFalse(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, eventlet.Timeout(), Exception) + self.assertFalse(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + + replicas = 4 + res, sharder = do_test(replicas, 202, 202, 202, 202) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self.assertTrue(res) + res, sharder = do_test(replicas, 202, 202, 404, 404) + self.assertTrue(res) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, 202, Exception, Exception) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, 202, 404, 404, 404) + self.assertFalse(res) + self.assertEqual([True, True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 500, 500, 500, 202) + self.assertFalse(res) + self.assertEqual([True, True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, Exception, Exception, 202, 404) + self.assertFalse(res) + self.assertEqual([True], [ + all(msg in line for msg in ('Failed to put shard ranges', '404')) + for line in sharder.logger.get_lines_for_level('warning')]) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test( + replicas, eventlet.Timeout(), eventlet.Timeout(), 202, 404) + self.assertFalse(res) + self.assertEqual([True], [ + all(msg in line for msg in ('Failed to put shard ranges', '404')) + for line in sharder.logger.get_lines_for_level('warning')]) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + + def test_process_broker_not_sharding_no_others(self): + # verify that sharding process will not start when own shard range is + # missing or in wrong state or there are no other shard ranges + broker = self._make_broker() + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + # sanity check + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + + # no own shard range + with self._mock_sharder() as sharder: + sharder._process_broker(broker, node, 99) + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + broker.logger.clear() + + # now add own shard range + for state in sorted(ShardRange.STATES): + own_sr = broker.get_own_shard_range() # returns the default + own_sr.update_state(state) + broker.merge_shard_ranges([own_sr]) + with mock.patch.object( + broker, 'set_sharding_state') as mock_set_sharding_state: + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + with mock.patch.object(sharder, '_audit_container'): + sharder.logger = debug_logger() + sharder._process_broker(broker, node, 99) + own_shard_range = broker.get_own_shard_range( + no_default=True) + mock_set_sharding_state.assert_not_called() + self.assertEqual(dict(own_sr, meta_timestamp=now), + dict(own_shard_range)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + broker.logger.clear() + + def _check_process_broker_sharding_no_others(self, state): + # verify that when existing own_shard_range has given state and there + # are other shard ranges then the sharding process will begin + broker = self._make_broker(hash_='hash%s' % state) + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + own_sr = broker.get_own_shard_range() + self.assertTrue(own_sr.update_state(state)) + epoch = Timestamp.now() + own_sr.epoch = epoch + shard_ranges = self._make_shard_ranges((('', 'm'), ('m', ''))) + broker.merge_shard_ranges([own_sr] + shard_ranges) + + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_create_shard_containers', return_value=0): + with mock_timestamp_now() as now: + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + final_own_sr = broker.get_own_shard_range(no_default=True) + + self.assertEqual(dict(own_sr, meta_timestamp=now), + dict(final_own_sr)) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(epoch.normal, parse_db_filename(broker.db_file)[1]) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + + def test_process_broker_sharding_with_own_shard_range_no_others(self): + self._check_process_broker_sharding_no_others(ShardRange.SHARDING) + self._check_process_broker_sharding_no_others(ShardRange.SHRINKING) + + def test_process_broker_not_sharding_others(self): + # verify that sharding process will not start when own shard range is + # missing or in wrong state even when other shard ranges are in the db + broker = self._make_broker() + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + # sanity check + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + + # add shard ranges - but not own + shard_ranges = self._make_shard_ranges((('', 'h'), ('h', ''))) + broker.merge_shard_ranges(shard_ranges) + + with self._mock_sharder() as sharder: + sharder._process_broker(broker, node, 99) + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + broker.logger.clear() + + # now add own shard range + for state in sorted(ShardRange.STATES): + if state in (ShardRange.SHARDING, + ShardRange.SHRINKING, + ShardRange.SHARDED): + epoch = None + else: + epoch = Timestamp.now() + + own_sr = broker.get_own_shard_range() # returns the default + own_sr.update_state(state) + own_sr.epoch = epoch + broker.merge_shard_ranges([own_sr]) + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._process_broker(broker, node, 99) + own_shard_range = broker.get_own_shard_range( + no_default=True) + self.assertEqual(dict(own_sr, meta_timestamp=now), + dict(own_shard_range)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + if epoch: + self.assertFalse(broker.logger.get_lines_for_level('warning')) + else: + self.assertIn('missing epoch', + broker.logger.get_lines_for_level('warning')[0]) + self.assertFalse(broker.logger.get_lines_for_level('error')) + broker.logger.clear() + + def _check_process_broker_sharding_others(self, state): + # verify states in which own_shard_range will cause sharding + # process to start when other shard ranges are in the db + broker = self._make_broker(hash_='hash%s' % state) + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + # add shard ranges - but not own + shard_ranges = self._make_shard_ranges((('', 'h'), ('h', ''))) + broker.merge_shard_ranges(shard_ranges) + # sanity check + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + + # now set own shard range to given state and persist it + own_sr = broker.get_own_shard_range() # returns the default + self.assertTrue(own_sr.update_state(state)) + epoch = Timestamp.now() + own_sr.epoch = epoch + broker.merge_shard_ranges([own_sr]) + with self._mock_sharder() as sharder: + + sharder.logger = debug_logger() + with mock_timestamp_now() as now: + # we're not testing rest of the process here so prevent any + # attempt to progress shard range states + sharder._create_shard_containers = lambda *args: 0 + sharder._process_broker(broker, node, 99) + own_shard_range = broker.get_own_shard_range(no_default=True) + + self.assertEqual(dict(own_sr, meta_timestamp=now), + dict(own_shard_range)) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(epoch.normal, parse_db_filename(broker.db_file)[1]) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + + def test_process_broker_sharding_with_own_shard_range_and_others(self): + self._check_process_broker_sharding_others(ShardRange.SHARDING) + self._check_process_broker_sharding_others(ShardRange.SHRINKING) + self._check_process_broker_sharding_others(ShardRange.SHARDED) + + def check_shard_ranges_sent(self, broker, expected_sent): + bodies = [] + + def capture_send(conn, data): + bodies.append(data) + + with self._mock_sharder() as sharder: + with mocked_http_conn(204, 204, 204, + give_send=capture_send) as mock_conn: + sharder._update_root_container(broker) + + for req in mock_conn.requests: + self.assertEqual('PUT', req['method']) + self.assertEqual([expected_sent] * 3, + [json.loads(b) for b in bodies]) + + def test_update_root_container_own_range(self): + broker = self._make_broker() + + # nothing to send + with self._mock_sharder() as sharder: + with mocked_http_conn() as mock_conn: + sharder._update_root_container(broker) + self.assertFalse(mock_conn.requests) + + def check_only_own_shard_range_sent(state): + own_shard_range = broker.get_own_shard_range() + self.assertTrue(own_shard_range.update_state( + state, state_timestamp=next(self.ts_iter))) + broker.merge_shard_ranges([own_shard_range]) + # add an object, expect to see it reflected in the own shard range + # that is sent + broker.put_object(str(own_shard_range.object_count + 1), + next(self.ts_iter).internal, 1, '', '') + with mock_timestamp_now() as now: + # force own shard range meta updates to be at fixed timestamp + expected_sent = [ + dict(own_shard_range, + meta_timestamp=now.internal, + object_count=own_shard_range.object_count + 1, + bytes_used=own_shard_range.bytes_used + 1)] + self.check_shard_ranges_sent(broker, expected_sent) + + for state in ShardRange.STATES: + with annotate_failure(state): + check_only_own_shard_range_sent(state) + + def test_update_root_container_all_ranges(self): + broker = self._make_broker() + other_shard_ranges = self._make_shard_ranges((('', 'h'), ('h', ''))) + self.assertTrue(other_shard_ranges[0].set_deleted()) + broker.merge_shard_ranges(other_shard_ranges) + + # own range missing - send nothing + with self._mock_sharder() as sharder: + with mocked_http_conn() as mock_conn: + sharder._update_root_container(broker) + self.assertFalse(mock_conn.requests) + + def check_all_shard_ranges_sent(state): + own_shard_range = broker.get_own_shard_range() + self.assertTrue(own_shard_range.update_state( + state, state_timestamp=next(self.ts_iter))) + broker.merge_shard_ranges([own_shard_range]) + # add an object, expect to see it reflected in the own shard range + # that is sent + broker.put_object(str(own_shard_range.object_count + 1), + next(self.ts_iter).internal, 1, '', '') + with mock_timestamp_now() as now: + shard_ranges = broker.get_shard_ranges(include_deleted=True) + expected_sent = sorted([ + own_shard_range.copy( + meta_timestamp=now.internal, + object_count=own_shard_range.object_count + 1, + bytes_used=own_shard_range.bytes_used + 1)] + + shard_ranges, + key=lambda sr: (sr.upper, sr.state, sr.lower)) + self.check_shard_ranges_sent( + broker, [dict(sr) for sr in expected_sent]) + + for state in ShardRange.STATES.keys(): + with annotate_failure(state): + check_all_shard_ranges_sent(state) + + def test_audit_root_container(self): + broker = self._make_broker() + + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0} + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_audit_shard_container') as mocked: + sharder._audit_container(broker) + self._assert_stats(expected_stats, sharder, 'audit_root') + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + mocked.assert_not_called() + + def assert_overlap_warning(line, state_text): + self.assertIn( + 'Audit failed for root %s' % broker.db_file, line) + self.assertIn( + 'overlapping ranges in state %s: k-t s-z' % state_text, + line) + + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1} + shard_bounds = (('a', 'j'), ('k', 't'), ('s', 'z')) + for state, state_text in ShardRange.STATES.items(): + shard_ranges = self._make_shard_ranges(shard_bounds, state) + broker.merge_shard_ranges(shard_ranges) + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_audit_shard_container') as mocked: + sharder._audit_container(broker) + lines = sharder.logger.get_lines_for_level('warning') + assert_overlap_warning(lines[0], state_text) + self.assertFalse(lines[1:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self._assert_stats(expected_stats, sharder, 'audit_root') + mocked.assert_not_called() + + def assert_missing_warning(line): + self.assertIn( + 'Audit failed for root %s' % broker.db_file, line) + self.assertIn('missing range(s): -a j-k z-', line) + + own_shard_range = broker.get_own_shard_range() + states = (ShardRange.SHARDING, ShardRange.SHARDED) + for state in states: + own_shard_range.update_state( + state, state_timestamp=next(self.ts_iter)) + broker.merge_shard_ranges([own_shard_range]) + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_audit_shard_container') as mocked: + sharder._audit_container(broker) + lines = sharder.logger.get_lines_for_level('warning') + assert_missing_warning(lines[0]) + assert_overlap_warning(lines[0], state_text) + self.assertFalse(lines[1:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self._assert_stats(expected_stats, sharder, 'audit_root') + mocked.assert_not_called() + + def test_audit_shard_container(self): + broker = self._make_broker(account='.shards_a', container='shard_c') + broker.set_sharding_sysmeta('Root', 'a/c') + # include overlaps to verify correct match for updating own shard range + shard_bounds = ( + ('a', 'j'), ('k', 't'), ('k', 's'), ('l', 's'), ('s', 'z')) + shard_ranges = self._make_shard_ranges(shard_bounds, ShardRange.ACTIVE) + shard_ranges[1].name = broker.path + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1} + + def call_audit_container(exc=None): + with self._mock_sharder() as sharder: + sharder.logger = debug_logger() + with mock.patch.object(sharder, '_audit_root_container') \ + as mocked, mock.patch.object( + sharder, 'int_client') as mock_swift: + mock_response = mock.MagicMock() + mock_response.headers = {'x-backend-record-type': + 'shard'} + mock_response.body = json.dumps( + [dict(sr) for sr in shard_ranges]) + mock_swift.make_request.return_value = mock_response + mock_swift.make_request.side_effect = exc + mock_swift.make_path = (lambda a, c: + '/v1/%s/%s' % (a, c)) + sharder.reclaim_age = 0 + sharder._audit_container(broker) + mocked.assert_not_called() + return sharder, mock_swift + + # bad account name + broker.account = 'bad_account' + sharder, mock_swift = call_audit_container() + lines = sharder.logger.get_lines_for_level('warning') + self._assert_stats(expected_stats, sharder, 'audit_shard') + self.assertIn('Audit warnings for shard %s' % broker.db_file, lines[0]) + self.assertIn('account not in shards namespace', lines[0]) + self.assertNotIn('root has no matching shard range', lines[0]) + self.assertNotIn('unable to get shard ranges from root', lines[0]) + self.assertIn('Audit failed for shard %s' % broker.db_file, lines[1]) + self.assertIn('missing own shard range', lines[1]) + self.assertFalse(lines[2:]) + self.assertFalse(broker.is_deleted()) + + # missing own shard range + broker.get_info() + sharder, mock_swift = call_audit_container() + lines = sharder.logger.get_lines_for_level('warning') + self._assert_stats(expected_stats, sharder, 'audit_shard') + self.assertIn('Audit failed for shard %s' % broker.db_file, lines[0]) + self.assertIn('missing own shard range', lines[0]) + self.assertNotIn('unable to get shard ranges from root', lines[0]) + self.assertFalse(lines[1:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self.assertFalse(broker.is_deleted()) + + # create own shard range, no match in root + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0} + own_shard_range = broker.get_own_shard_range() # get the default + own_shard_range.lower = 'j' + own_shard_range.upper = 'k' + broker.merge_shard_ranges([own_shard_range]) + sharder, mock_swift = call_audit_container() + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Audit warnings for shard %s' % broker.db_file, lines[0]) + self.assertNotIn('account not in shards namespace', lines[0]) + self.assertNotIn('missing own shard range', lines[0]) + self.assertIn('root has no matching shard range', lines[0]) + self.assertNotIn('unable to get shard ranges from root', lines[0]) + self._assert_stats(expected_stats, sharder, 'audit_shard') + self.assertFalse(lines[1:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self.assertFalse(broker.is_deleted()) + expected_headers = {'X-Backend-Record-Type': 'shard', + 'X-Newest': 'true', + 'X-Backend-Include-Deleted': 'True', + 'X-Backend-Override-Deleted': 'true'} + params = {'format': 'json', 'marker': 'j', 'end_marker': 'k'} + mock_swift.make_request.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + # create own shard range, failed response from root + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0} + own_shard_range = broker.get_own_shard_range() # get the default + own_shard_range.lower = 'j' + own_shard_range.upper = 'k' + broker.merge_shard_ranges([own_shard_range]) + sharder, mock_swift = call_audit_container( + exc=internal_client.UnexpectedResponse('bad', 'resp')) + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Failed to get shard ranges', lines[0]) + self.assertIn('Audit warnings for shard %s' % broker.db_file, lines[1]) + self.assertNotIn('account not in shards namespace', lines[1]) + self.assertNotIn('missing own shard range', lines[1]) + self.assertNotIn('root has no matching shard range', lines[1]) + self.assertIn('unable to get shard ranges from root', lines[1]) + self._assert_stats(expected_stats, sharder, 'audit_shard') + self.assertFalse(lines[2:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self.assertFalse(broker.is_deleted()) + mock_swift.make_request.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + def assert_ok(): + sharder, mock_swift = call_audit_container() + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self._assert_stats(expected_stats, sharder, 'audit_shard') + params = {'format': 'json', 'marker': 'k', 'end_marker': 't'} + mock_swift.make_request.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + # make own shard range match one in root, but different state + shard_ranges[1].timestamp = Timestamp.now() + broker.merge_shard_ranges([shard_ranges[1]]) + now = Timestamp.now() + shard_ranges[1].update_state(ShardRange.SHARDING, state_timestamp=now) + assert_ok() + self.assertFalse(broker.is_deleted()) + # own shard range state is updated from root version + own_shard_range = broker.get_own_shard_range() + self.assertEqual(ShardRange.SHARDING, own_shard_range.state) + self.assertEqual(now, own_shard_range.state_timestamp) + + own_shard_range.update_state(ShardRange.SHARDED, + state_timestamp=Timestamp.now()) + broker.merge_shard_ranges([own_shard_range]) + assert_ok() + + own_shard_range.deleted = 1 + own_shard_range.timestamp = Timestamp.now() + broker.merge_shard_ranges([own_shard_range]) + assert_ok() + self.assertTrue(broker.is_deleted()) + + def test_find_and_enable_sharding_candidates(self): + broker = self._make_broker() + broker.enable_sharding(next(self.ts_iter)) + shard_bounds = (('', 'here'), ('here', 'there'), ('there', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CLEAVED) + shard_ranges[0].state = ShardRange.ACTIVE + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + self.assertTrue(broker.set_sharded_state()) + with self._mock_sharder() as sharder: + sharder._find_and_enable_sharding_candidates(broker) + + # one range just below threshold + shard_ranges[0].update_meta(sharder.shard_container_threshold - 1, 0) + broker.merge_shard_ranges(shard_ranges[0]) + with self._mock_sharder() as sharder: + sharder._find_and_enable_sharding_candidates(broker) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + + # two ranges above threshold, only one ACTIVE + shard_ranges[0].update_meta(sharder.shard_container_threshold, 0) + shard_ranges[2].update_meta(sharder.shard_container_threshold + 1, 0) + broker.merge_shard_ranges([shard_ranges[0], shard_ranges[2]]) + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates(broker) + expected = shard_ranges[0].copy(state=ShardRange.SHARDING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal([expected] + shard_ranges[1:], + broker.get_shard_ranges()) + + # check idempotency + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates(broker) + self._assert_shard_ranges_equal([expected] + shard_ranges[1:], + broker.get_shard_ranges()) + + # two ranges above threshold, both ACTIVE + shard_ranges[2].update_state(ShardRange.ACTIVE) + broker.merge_shard_ranges(shard_ranges[2]) + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates(broker) + expected_2 = shard_ranges[2].copy(state=ShardRange.SHARDING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal( + [expected, shard_ranges[1], expected_2], broker.get_shard_ranges()) + + # check idempotency + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates(broker) + self._assert_shard_ranges_equal( + [expected, shard_ranges[1], expected_2], broker.get_shard_ranges()) + + def test_find_and_enable_sharding_candidates_bootstrap(self): + broker = self._make_broker() + with self._mock_sharder( + conf={'shard_container_threshold': 1}) as sharder: + sharder._find_and_enable_sharding_candidates(broker) + self.assertEqual(ShardRange.ACTIVE, broker.get_own_shard_range().state) + broker.put_object('obj', next(self.ts_iter).internal, 1, '', '') + self.assertEqual(1, broker.get_info()['object_count']) + with self._mock_sharder( + conf={'shard_container_threshold': 1}) as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates( + broker, [broker.get_own_shard_range()]) + own_sr = broker.get_own_shard_range() + self.assertEqual(ShardRange.SHARDING, own_sr.state) + self.assertEqual(now, own_sr.state_timestamp) + self.assertEqual(now, own_sr.epoch) + + # check idempotency + with self._mock_sharder( + conf={'shard_container_threshold': 1}) as sharder: + with mock_timestamp_now(): + sharder._find_and_enable_sharding_candidates( + broker, [broker.get_own_shard_range()]) + own_sr = broker.get_own_shard_range() + self.assertEqual(ShardRange.SHARDING, own_sr.state) + self.assertEqual(now, own_sr.state_timestamp) + self.assertEqual(now, own_sr.epoch) + + def test_find_and_enable_shrinking_candidates(self): + broker = self._make_broker() + broker.enable_sharding(next(self.ts_iter)) + shard_bounds = (('', 'here'), ('here', 'there'), ('there', '')) + size = (DEFAULT_SHARD_SHRINK_POINT * + DEFAULT_SHARD_CONTAINER_THRESHOLD / 100) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE, object_count=size) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + self.assertTrue(broker.set_sharded_state()) + with self._mock_sharder() as sharder: + sharder._find_and_enable_shrinking_candidates(broker) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + + # one range just below threshold + shard_ranges[0].update_meta(size - 1, 0) + broker.merge_shard_ranges(shard_ranges[0]) + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + acceptor = shard_ranges[1].copy(lower=shard_ranges[0].lower) + acceptor.timestamp = now + donor = shard_ranges[0].copy(state=ShardRange.SHRINKING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal([donor, acceptor, shard_ranges[2]], + broker.get_shard_ranges()) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(acceptor.account, acceptor.container, [acceptor]), + mock.call(donor.account, donor.container, [donor, acceptor])] + ) + + # check idempotency + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + self._assert_shard_ranges_equal([donor, acceptor, shard_ranges[2]], + broker.get_shard_ranges()) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(acceptor.account, acceptor.container, [acceptor]), + mock.call(donor.account, donor.container, [donor, acceptor])] + ) + + # acceptor falls below threshold - not a candidate + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + acceptor.update_meta(0, 0, meta_timestamp=now) + broker.merge_shard_ranges(acceptor) + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + self._assert_shard_ranges_equal([donor, acceptor, shard_ranges[2]], + broker.get_shard_ranges()) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(acceptor.account, acceptor.container, [acceptor]), + mock.call(donor.account, donor.container, [donor, acceptor])] + ) + + # ...until donor has shrunk + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + donor.update_state(ShardRange.SHARDED, state_timestamp=now) + donor.set_deleted(timestamp=now) + broker.merge_shard_ranges(donor) + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + new_acceptor = shard_ranges[2].copy(lower=acceptor.lower) + new_acceptor.timestamp = now + new_donor = acceptor.copy(state=ShardRange.SHRINKING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal( + [donor, new_donor, new_acceptor], + broker.get_shard_ranges(include_deleted=True)) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(new_acceptor.account, new_acceptor.container, + [new_acceptor]), + mock.call(new_donor.account, new_donor.container, + [new_donor, new_acceptor])] + ) + + # ..finally last shard shrinks to root + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + new_donor.update_state(ShardRange.SHARDED, state_timestamp=now) + new_donor.set_deleted(timestamp=now) + new_acceptor.update_meta(0, 0, meta_timestamp=now) + broker.merge_shard_ranges([new_donor, new_acceptor]) + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + final_donor = new_acceptor.copy(state=ShardRange.SHRINKING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal( + [donor, new_donor, final_donor], + broker.get_shard_ranges(include_deleted=True)) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(final_donor.account, final_donor.container, + [final_donor, broker.get_own_shard_range()])] + ) + + def test_partition_and_device_filters(self): + # verify partitions and devices kwargs result in filtering of processed + # containers but not of the local device ids. + ring = FakeRing() + dev_ids = set() + container_data = [] + for dev in ring.devs: + dev_ids.add(dev['id']) + part = str(dev['id']) + broker = self._make_broker( + container='c%s' % dev['id'], hash_='c%shash' % dev['id'], + device=dev['device'], part=part) + broker.update_metadata({'X-Container-Sysmeta-Sharding': + ('true', next(self.ts_iter).internal)}) + container_data.append((broker.path, dev['id'], part)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once() + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set(container_data), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once(partitions='0') + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set([container_data[0]]), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once(partitions='2,0') + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set([container_data[0], container_data[2]]), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once(partitions='2,0', devices='sdc') + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set([container_data[2]]), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once(devices='sdb,sdc') + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set(container_data[1:]), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + +class TestCleavingContext(BaseTestSharder): + def test_init(self): + ctx = CleavingContext(ref='test') + self.assertEqual('test', ctx.ref) + self.assertEqual('', ctx.cursor) + self.assertIsNone(ctx.max_row) + self.assertIsNone(ctx.cleave_to_row) + self.assertIsNone(ctx.last_cleave_to_row) + self.assertFalse(ctx.misplaced_done) + self.assertFalse(ctx.cleaving_done) + + def test_iter(self): + ctx = CleavingContext('test', 'curs', 12, 11, 10, False, True, 0, 4) + expected = {'ref': 'test', + 'cursor': 'curs', + 'max_row': 12, + 'cleave_to_row': 11, + 'last_cleave_to_row': 10, + 'cleaving_done': False, + 'misplaced_done': True, + 'ranges_done': 0, + 'ranges_todo': 4} + self.assertEqual(expected, dict(ctx)) + + def test_cursor(self): + broker = self._make_broker() + ref = CleavingContext._make_ref(broker) + + for curs in ('curs', u'curs\u00e4\u00fb'): + with annotate_failure('%r' % curs): + ctx = CleavingContext(ref, curs, 12, 11, 10, False, True) + self.assertEqual(curs.encode('utf8'), ctx.cursor) + ctx.store(broker) + ctx = CleavingContext.load(broker) + self.assertEqual(curs.encode('utf8'), ctx.cursor) + + def test_load(self): + broker = self._make_broker() + for i in range(6): + broker.put_object('o%s' % i, next(self.ts_iter).internal, 10, + 'text/plain', 'etag_a', 0) + + db_id = broker.get_info()['id'] + params = {'ref': db_id, + 'cursor': 'curs', + 'max_row': 2, + 'cleave_to_row': 2, + 'last_cleave_to_row': 1, + 'cleaving_done': False, + 'misplaced_done': True, + 'ranges_done': 2, + 'ranges_todo': 4} + key = 'X-Container-Sysmeta-Shard-Context-%s' % db_id + broker.update_metadata( + {key: (json.dumps(params), Timestamp.now().internal)}) + ctx = CleavingContext.load(broker) + self.assertEqual(db_id, ctx.ref) + self.assertEqual('curs', ctx.cursor) + # note max_row is dynamically updated during load + self.assertEqual(6, ctx.max_row) + self.assertEqual(2, ctx.cleave_to_row) + self.assertEqual(1, ctx.last_cleave_to_row) + self.assertTrue(ctx.misplaced_done) + self.assertFalse(ctx.cleaving_done) + self.assertEqual(2, ctx.ranges_done) + self.assertEqual(4, ctx.ranges_todo) + + def test_store(self): + broker = self._make_sharding_broker() + old_db_id = broker.get_brokers()[0].get_info()['id'] + ctx = CleavingContext(old_db_id, 'curs', 12, 11, 2, True, True, 2, 4) + ctx.store(broker) + key = 'X-Container-Sysmeta-Shard-Context-%s' % old_db_id + data = json.loads(broker.metadata[key][0]) + expected = {'ref': old_db_id, + 'cursor': 'curs', + 'max_row': 12, + 'cleave_to_row': 11, + 'last_cleave_to_row': 2, + 'cleaving_done': True, + 'misplaced_done': True, + 'ranges_done': 2, + 'ranges_todo': 4} + self.assertEqual(expected, data) + + def test_store_add_row_load(self): + # adding row to older db changes only max_row in the context + broker = self._make_sharding_broker() + old_broker = broker.get_brokers()[0] + old_db_id = old_broker.get_info()['id'] + old_broker.merge_items([old_broker._record_to_dict( + ('obj', next(self.ts_iter).internal, 0, 'text/plain', 'etag', 1))]) + old_max_row = old_broker.get_max_row() + self.assertEqual(1, old_max_row) # sanity check + ctx = CleavingContext(old_db_id, 'curs', 1, 1, 0, True, True) + ctx.store(broker) + + # adding a row changes max row + old_broker.merge_items([old_broker._record_to_dict( + ('obj', next(self.ts_iter).internal, 0, 'text/plain', 'etag', 1))]) + + new_ctx = CleavingContext.load(broker) + self.assertEqual(old_db_id, new_ctx.ref) + self.assertEqual('curs', new_ctx.cursor) + self.assertEqual(2, new_ctx.max_row) + self.assertEqual(1, new_ctx.cleave_to_row) + self.assertEqual(0, new_ctx.last_cleave_to_row) + self.assertTrue(new_ctx.misplaced_done) + self.assertTrue(new_ctx.cleaving_done) + + def test_store_reclaim_load(self): + # reclaiming rows from older db does not change context + broker = self._make_sharding_broker() + old_broker = broker.get_brokers()[0] + old_db_id = old_broker.get_info()['id'] + old_broker.merge_items([old_broker._record_to_dict( + ('obj', next(self.ts_iter).internal, 0, 'text/plain', 'etag', 1))]) + old_max_row = old_broker.get_max_row() + self.assertEqual(1, old_max_row) # sanity check + ctx = CleavingContext(old_db_id, 'curs', 1, 1, 0, True, True) + ctx.store(broker) + + self.assertEqual( + 1, len(old_broker.get_objects())) + now = next(self.ts_iter).internal + broker.get_brokers()[0].reclaim(now, now) + self.assertFalse(old_broker.get_objects()) + + new_ctx = CleavingContext.load(broker) + self.assertEqual(old_db_id, new_ctx.ref) + self.assertEqual('curs', new_ctx.cursor) + self.assertEqual(1, new_ctx.max_row) + self.assertEqual(1, new_ctx.cleave_to_row) + self.assertEqual(0, new_ctx.last_cleave_to_row) + self.assertTrue(new_ctx.misplaced_done) + self.assertTrue(new_ctx.cleaving_done) + + def test_store_modify_db_id_load(self): + # changing id changes ref, so results in a fresh context + broker = self._make_sharding_broker() + old_broker = broker.get_brokers()[0] + old_db_id = old_broker.get_info()['id'] + ctx = CleavingContext(old_db_id, 'curs', 12, 11, 2, True, True) + ctx.store(broker) + + old_broker.newid('fake_remote_id') + new_db_id = old_broker.get_info()['id'] + self.assertNotEqual(old_db_id, new_db_id) + + new_ctx = CleavingContext.load(broker) + self.assertEqual(new_db_id, new_ctx.ref) + self.assertEqual('', new_ctx.cursor) + # note max_row is dynamically updated during load + self.assertEqual(-1, new_ctx.max_row) + self.assertEqual(None, new_ctx.cleave_to_row) + self.assertEqual(None, new_ctx.last_cleave_to_row) + self.assertFalse(new_ctx.misplaced_done) + self.assertFalse(new_ctx.cleaving_done) + + def test_load_modify_store_load(self): + broker = self._make_sharding_broker() + old_db_id = broker.get_brokers()[0].get_info()['id'] + ctx = CleavingContext.load(broker) + self.assertEqual(old_db_id, ctx.ref) + self.assertEqual('', ctx.cursor) # sanity check + ctx.cursor = 'curs' + ctx.misplaced_done = True + ctx.store(broker) + ctx = CleavingContext.load(broker) + self.assertEqual(old_db_id, ctx.ref) + self.assertEqual('curs', ctx.cursor) + self.assertTrue(ctx.misplaced_done) + + def test_reset(self): + ctx = CleavingContext('test', 'curs', 12, 11, 2, True, True) + + def check_context(): + self.assertEqual('test', ctx.ref) + self.assertEqual('', ctx.cursor) + self.assertEqual(12, ctx.max_row) + self.assertEqual(11, ctx.cleave_to_row) + self.assertEqual(11, ctx.last_cleave_to_row) + self.assertFalse(ctx.misplaced_done) + self.assertFalse(ctx.cleaving_done) + self.assertEqual(0, ctx.ranges_done) + self.assertEqual(0, ctx.ranges_todo) + ctx.reset() + # check idempotency + ctx.reset() + + def test_start(self): + ctx = CleavingContext('test', 'curs', 12, 11, 2, True, True) + + def check_context(): + self.assertEqual('test', ctx.ref) + self.assertEqual('', ctx.cursor) + self.assertEqual(12, ctx.max_row) + self.assertEqual(12, ctx.cleave_to_row) + self.assertEqual(2, ctx.last_cleave_to_row) + self.assertTrue(ctx.misplaced_done) # *not* reset here + self.assertFalse(ctx.cleaving_done) + self.assertEqual(0, ctx.ranges_done) + self.assertEqual(0, ctx.ranges_todo) + ctx.start() + # check idempotency + ctx.start() diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 0571a80724..7a77603f4c 100644 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -1053,7 +1053,7 @@ class TestObjectController(unittest.TestCase): mock_ring = mock.MagicMock() mock_ring.get_nodes.return_value = (99, [node]) object_updater.container_ring = mock_ring - mock_update.return_value = ((True, 1)) + mock_update.return_value = ((True, 1, None)) object_updater.run_once() self.assertEqual(1, mock_update.call_count) self.assertEqual((node, 99, 'PUT', '/a/c/o'), @@ -1061,6 +1061,7 @@ class TestObjectController(unittest.TestCase): actual_headers = mock_update.call_args_list[0][0][4] # User-Agent is updated. expected_post_headers['User-Agent'] = 'object-updater %s' % os.getpid() + expected_post_headers['X-Backend-Accept-Redirect'] = 'true' self.assertDictEqual(expected_post_headers, actual_headers) self.assertFalse( os.listdir(os.path.join( @@ -1073,6 +1074,104 @@ class TestObjectController(unittest.TestCase): self._test_PUT_then_POST_async_pendings( POLICIES[1], update_etag='override_etag') + def _check_PUT_redirected_async_pending(self, container_path=None): + # When container update is redirected verify that the redirect location + # is persisted in the async pending file. + policy = POLICIES[0] + device_dir = os.path.join(self.testdir, 'sda1') + t_put = next(self.ts) + update_etag = '098f6bcd4621d373cade4e832627b4f6' + + put_headers = { + 'X-Trans-Id': 'put_trans_id', + 'X-Timestamp': t_put.internal, + 'Content-Type': 'application/octet-stream;swift_bytes=123456789', + 'Content-Length': '4', + 'X-Backend-Storage-Policy-Index': int(policy), + 'X-Container-Host': 'chost:3200', + 'X-Container-Partition': '99', + 'X-Container-Device': 'cdevice'} + + if container_path: + # the proxy may include this header + put_headers['X-Backend-Container-Path'] = container_path + expected_update_path = '/cdevice/99/%s/o' % container_path + else: + expected_update_path = '/cdevice/99/a/c/o' + + if policy.policy_type == EC_POLICY: + put_headers.update({ + 'X-Object-Sysmeta-Ec-Frag-Index': '2', + 'X-Backend-Container-Update-Override-Etag': update_etag, + 'X-Object-Sysmeta-Ec-Etag': update_etag}) + + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'PUT'}, + headers=put_headers, body='test') + resp_headers = {'Location': '/.sharded_a/c_shard_1/o', + 'X-Backend-Redirect-Timestamp': next(self.ts).internal} + + with mocked_http_conn(301, headers=[resp_headers]) as conn, \ + mock.patch('swift.common.utils.HASH_PATH_PREFIX', ''),\ + fake_spawn(): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 201) + self.assertEqual(1, len(conn.requests)) + + self.assertEqual(expected_update_path, conn.requests[0]['path']) + + # whether or not an X-Backend-Container-Path was received from the + # proxy, the async pending file should now have the container_path + # equal to the Location header received in the update response. + async_pending_file_put = os.path.join( + device_dir, diskfile.get_async_dir(policy), 'a83', + '06fbf0b514e5199dfc4e00f42eb5ea83-%s' % t_put.internal) + self.assertTrue(os.path.isfile(async_pending_file_put), + 'Expected %s to be a file but it is not.' + % async_pending_file_put) + expected_put_headers = { + 'Referer': 'PUT http://localhost/sda1/p/a/c/o', + 'X-Trans-Id': 'put_trans_id', + 'X-Timestamp': t_put.internal, + 'X-Content-Type': 'application/octet-stream;swift_bytes=123456789', + 'X-Size': '4', + 'X-Etag': '098f6bcd4621d373cade4e832627b4f6', + 'User-Agent': 'object-server %s' % os.getpid(), + 'X-Backend-Storage-Policy-Index': '%d' % int(policy)} + if policy.policy_type == EC_POLICY: + expected_put_headers['X-Etag'] = update_etag + self.assertEqual( + {'headers': expected_put_headers, + 'account': 'a', 'container': 'c', 'obj': 'o', 'op': 'PUT', + 'container_path': '.sharded_a/c_shard_1'}, + pickle.load(open(async_pending_file_put))) + + # when updater is run its first request will be to the redirect + # location that is persisted in the async pending file + with mocked_http_conn(201) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache', + lambda *args: None): + object_updater = updater.ObjectUpdater( + {'devices': self.testdir, + 'mount_check': 'false'}, logger=debug_logger()) + node = {'id': 1, 'ip': 'chost', 'port': 3200, + 'device': 'cdevice'} + mock_ring = mock.MagicMock() + mock_ring.get_nodes.return_value = (99, [node]) + object_updater.container_ring = mock_ring + object_updater.run_once() + + self.assertEqual(1, len(conn.requests)) + self.assertEqual('/cdevice/99/.sharded_a/c_shard_1/o', + conn.requests[0]['path']) + + def test_PUT_redirected_async_pending(self): + self._check_PUT_redirected_async_pending() + + def test_PUT_redirected_async_pending_with_container_path(self): + self._check_PUT_redirected_async_pending(container_path='.another/c') + def test_POST_quarantine_zbyte(self): timestamp = normalize_timestamp(time()) req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, @@ -5263,6 +5362,95 @@ class TestObjectController(unittest.TestCase): 'X-Backend-Container-Update-Override-Content-Type': 'ignored', 'X-Backend-Container-Update-Override-Foo': 'ignored'}) + def test_PUT_container_update_to_shard(self): + # verify that alternate container update path is respected when + # included in request headers + def do_test(container_path, expected_path, expected_container_path): + policy = random.choice(list(POLICIES)) + container_updates = [] + + def capture_updates( + ip, port, method, path, headers, *args, **kwargs): + container_updates.append((ip, port, method, path, headers)) + + pickle_async_update_args = [] + + def fake_pickle_async_update(*args): + pickle_async_update_args.append(args) + + diskfile_mgr = self.object_controller._diskfile_router[policy] + diskfile_mgr.pickle_async_update = fake_pickle_async_update + + ts_put = next(self.ts) + headers = { + 'X-Timestamp': ts_put.internal, + 'X-Trans-Id': '123', + 'X-Container-Host': 'chost:cport', + 'X-Container-Partition': 'cpartition', + 'X-Container-Device': 'cdevice', + 'Content-Type': 'text/plain', + 'X-Object-Sysmeta-Ec-Frag-Index': 0, + 'X-Backend-Storage-Policy-Index': int(policy), + } + if container_path is not None: + headers['X-Backend-Container-Path'] = container_path + + req = Request.blank('/sda1/0/a/c/o', method='PUT', + headers=headers, body='') + with mocked_http_conn( + 500, give_connect=capture_updates) as fake_conn: + with fake_spawn(): + resp = req.get_response(self.object_controller) + self.assertRaises(StopIteration, fake_conn.code_iter.next) + self.assertEqual(resp.status_int, 201) + self.assertEqual(len(container_updates), 1) + # verify expected path used in update request + ip, port, method, path, headers = container_updates[0] + self.assertEqual(ip, 'chost') + self.assertEqual(port, 'cport') + self.assertEqual(method, 'PUT') + self.assertEqual(path, '/cdevice/cpartition/%s/o' % expected_path) + + # verify that the picked update *always* has root container + self.assertEqual(1, len(pickle_async_update_args)) + (objdevice, account, container, obj, data, timestamp, + policy) = pickle_async_update_args[0] + self.assertEqual(objdevice, 'sda1') + self.assertEqual(account, 'a') # NB user account + self.assertEqual(container, 'c') # NB root container + self.assertEqual(obj, 'o') + self.assertEqual(timestamp, ts_put.internal) + self.assertEqual(policy, policy) + expected_data = { + 'headers': HeaderKeyDict({ + 'X-Size': '0', + 'User-Agent': 'object-server %s' % os.getpid(), + 'X-Content-Type': 'text/plain', + 'X-Timestamp': ts_put.internal, + 'X-Trans-Id': '123', + 'Referer': 'PUT http://localhost/sda1/0/a/c/o', + 'X-Backend-Storage-Policy-Index': int(policy), + 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e'}), + 'obj': 'o', + 'account': 'a', + 'container': 'c', + 'op': 'PUT'} + if expected_container_path: + expected_data['container_path'] = expected_container_path + self.assertEqual(expected_data, data) + + do_test('a_shard/c_shard', 'a_shard/c_shard', 'a_shard/c_shard') + do_test('', 'a/c', None) + do_test(None, 'a/c', None) + # TODO: should these cases trigger a 400 response rather than + # defaulting to root path? + do_test('garbage', 'a/c', None) + do_test('/', 'a/c', None) + do_test('/no-acct', 'a/c', None) + do_test('no-cont/', 'a/c', None) + do_test('too/many/parts', 'a/c', None) + do_test('/leading/slash', 'a/c', None) + def test_container_update_async(self): policy = random.choice(list(POLICIES)) req = Request.blank( @@ -5335,23 +5523,21 @@ class TestObjectController(unittest.TestCase): 'X-Container-Partition': '20', 'X-Container-Host': '1.2.3.4:5', 'X-Container-Device': 'sdb1'}) - with mock.patch.object(object_server, 'spawn', - local_fake_spawn): - with mock.patch.object(self.object_controller, - 'async_update', - local_fake_async_update): - resp = req.get_response(self.object_controller) - # check the response is completed and successful - self.assertEqual(resp.status_int, 201) - # check that async_update hasn't been called - self.assertFalse(len(called_async_update_args)) - # now do the work in greenthreads - for func, a, kw in saved_spawn_calls: - gt = spawn(func, *a, **kw) - greenthreads.append(gt) - # wait for the greenthreads to finish - for gt in greenthreads: - gt.wait() + with mock.patch.object(object_server, 'spawn', local_fake_spawn), \ + mock.patch.object(self.object_controller, 'async_update', + local_fake_async_update): + resp = req.get_response(self.object_controller) + # check the response is completed and successful + self.assertEqual(resp.status_int, 201) + # check that async_update hasn't been called + self.assertFalse(len(called_async_update_args)) + # now do the work in greenthreads + for func, a, kw in saved_spawn_calls: + gt = spawn(func, *a, **kw) + greenthreads.append(gt) + # wait for the greenthreads to finish + for gt in greenthreads: + gt.wait() # check that the calls to async_update have happened headers_out = {'X-Size': '0', 'X-Content-Type': 'application/burrito', @@ -5362,7 +5548,8 @@ class TestObjectController(unittest.TestCase): 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e'} expected = [('PUT', 'a', 'c', 'o', '1.2.3.4:5', '20', 'sdb1', headers_out, 'sda1', POLICIES[0]), - {'logger_thread_locals': (None, None)}] + {'logger_thread_locals': (None, None), + 'container_path': None}] self.assertEqual(called_async_update_args, [expected]) def test_container_update_as_greenthread_with_timeout(self): diff --git a/test/unit/obj/test_updater.py b/test/unit/obj/test_updater.py index aac6325254..ae51153b8e 100644 --- a/test/unit/obj/test_updater.py +++ b/test/unit/obj/test_updater.py @@ -65,7 +65,9 @@ class TestObjectUpdater(unittest.TestCase): {'id': 1, 'ip': '127.0.0.1', 'port': 1, 'device': 'sda1', 'zone': 2}, {'id': 2, 'ip': '127.0.0.1', 'port': 1, - 'device': 'sda1', 'zone': 4}], 30), + 'device': 'sda1', 'zone': 4}, + {'id': 3, 'ip': '127.0.0.1', 'port': 1, + 'device': 'sda1', 'zone': 6}], 30), f) self.devices_dir = os.path.join(self.testdir, 'devices') os.mkdir(self.devices_dir) @@ -74,6 +76,7 @@ class TestObjectUpdater(unittest.TestCase): for policy in POLICIES: os.mkdir(os.path.join(self.sda1, get_tmp_dir(policy))) self.logger = debug_logger() + self.ts_iter = make_timestamp_iter() def tearDown(self): rmtree(self.testdir, ignore_errors=1) @@ -299,19 +302,22 @@ class TestObjectUpdater(unittest.TestCase): self.assertIn("sweep progress", info_lines[1]) # the space ensures it's a positive number self.assertIn( - "2 successes, 0 failures, 0 quarantines, 2 unlinks, 0 error", + "2 successes, 0 failures, 0 quarantines, 2 unlinks, 0 errors, " + "0 redirects", info_lines[1]) self.assertIn(self.sda1, info_lines[1]) self.assertIn("sweep progress", info_lines[2]) self.assertIn( - "4 successes, 0 failures, 0 quarantines, 4 unlinks, 0 error", + "4 successes, 0 failures, 0 quarantines, 4 unlinks, 0 errors, " + "0 redirects", info_lines[2]) self.assertIn(self.sda1, info_lines[2]) self.assertIn("sweep complete", info_lines[3]) self.assertIn( - "5 successes, 0 failures, 0 quarantines, 5 unlinks, 0 error", + "5 successes, 0 failures, 0 quarantines, 5 unlinks, 0 errors, " + "0 redirects", info_lines[3]) self.assertIn(self.sda1, info_lines[3]) @@ -547,6 +553,26 @@ class TestObjectUpdater(unittest.TestCase): {'successes': 1, 'unlinks': 1, 'async_pendings': 1}) + def _write_async_update(self, dfmanager, timestamp, policy, + headers=None, container_path=None): + # write an async + account, container, obj = 'a', 'c', 'o' + op = 'PUT' + headers_out = headers or { + 'x-size': 0, + 'x-content-type': 'text/plain', + 'x-etag': 'd41d8cd98f00b204e9800998ecf8427e', + 'x-timestamp': timestamp.internal, + 'X-Backend-Storage-Policy-Index': int(policy), + 'User-Agent': 'object-server %s' % os.getpid() + } + data = {'op': op, 'account': account, 'container': container, + 'obj': obj, 'headers': headers_out} + if container_path: + data['container_path'] = container_path + dfmanager.pickle_async_update(self.sda1, account, container, obj, + data, timestamp, policy) + def test_obj_put_async_updates(self): ts_iter = make_timestamp_iter() policies = list(POLICIES) @@ -562,16 +588,12 @@ class TestObjectUpdater(unittest.TestCase): async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) os.mkdir(async_dir) - def do_test(headers_out, expected): + def do_test(headers_out, expected, container_path=None): # write an async dfmanager = DiskFileManager(conf, daemon.logger) - account, container, obj = 'a', 'c', 'o' - op = 'PUT' - data = {'op': op, 'account': account, 'container': container, - 'obj': obj, 'headers': headers_out} - dfmanager.pickle_async_update(self.sda1, account, container, obj, - data, next(ts_iter), policies[0]) - + self._write_async_update(dfmanager, next(ts_iter), policies[0], + headers=headers_out, + container_path=container_path) request_log = [] def capture(*args, **kwargs): @@ -613,11 +635,21 @@ class TestObjectUpdater(unittest.TestCase): 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e', 'X-Timestamp': ts.normal, 'X-Backend-Storage-Policy-Index': str(int(policies[0])), - 'User-Agent': 'object-updater %s' % os.getpid() + 'User-Agent': 'object-updater %s' % os.getpid(), + 'X-Backend-Accept-Redirect': 'true', } + # always expect X-Backend-Accept-Redirect to be true + do_test(headers_out, expected, container_path='.shards_a/shard_c') do_test(headers_out, expected) + # ...unless X-Backend-Accept-Redirect is already set + expected['X-Backend-Accept-Redirect'] = 'false' + headers_out_2 = dict(headers_out) + headers_out_2['X-Backend-Accept-Redirect'] = 'false' + do_test(headers_out_2, expected) + # updater should add policy header if missing + expected['X-Backend-Accept-Redirect'] = 'true' headers_out['X-Backend-Storage-Policy-Index'] = None do_test(headers_out, expected) @@ -632,6 +664,414 @@ class TestObjectUpdater(unittest.TestCase): 'X-Backend-Storage-Policy-Index') do_test(headers_out, expected) + def _check_update_requests(self, requests, timestamp, policy): + # do some sanity checks on update request + expected_headers = { + 'X-Size': '0', + 'X-Content-Type': 'text/plain', + 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e', + 'X-Timestamp': timestamp.internal, + 'X-Backend-Storage-Policy-Index': str(int(policy)), + 'User-Agent': 'object-updater %s' % os.getpid(), + 'X-Backend-Accept-Redirect': 'true'} + for request in requests: + self.assertEqual('PUT', request['method']) + self.assertDictEqual(expected_headers, request['headers']) + + def test_obj_put_async_root_update_redirected(self): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0]) + + # run once + ts_redirect_1 = next(self.ts_iter) + ts_redirect_2 = next(self.ts_iter) + fake_responses = [ + # first round of update attempts, newest redirect should be chosen + (200, {}), + (301, {'Location': '/.shards_a/c_shard_new/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_2.internal}), + (301, {'Location': '/.shards_a/c_shard_old/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_1.internal}), + # second round of update attempts + (200, {}), + (200, {}), + (200, {}), + ] + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests[:3], ts_obj, policies[0]) + self._check_update_requests(conn.requests[3:], ts_obj, policies[0]) + self.assertEqual(['/sda1/0/a/c/o'] * 3 + + ['/sda1/0/.shards_a/c_shard_new/o'] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 1, 'successes': 1, + 'unlinks': 1, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + self.assertFalse(os.listdir(async_dir)) # no async file + + def test_obj_put_async_root_update_redirected_previous_success(self): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0]) + orig_async_path, orig_async_data = self._check_async_file(async_dir) + + # run once + with mocked_http_conn( + 507, 200, 507) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual(['/sda1/0/a/c/o'] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'failures': 1, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(dict(orig_async_data, successes=[1]), async_data) + + # run again - expect 3 redirected updates despite previous success + ts_redirect = next(self.ts_iter) + resp_headers_1 = {'Location': '/.shards_a/c_shard_1/o', + 'X-Backend-Redirect-Timestamp': ts_redirect.internal} + fake_responses = ( + # 1st round of redirects, 2nd round of redirects + [(301, resp_headers_1)] * 2 + [(200, {})] * 3) + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests[:2], ts_obj, policies[0]) + self._check_update_requests(conn.requests[2:], ts_obj, policies[0]) + root_part = daemon.container_ring.get_part('a/c') + shard_1_part = daemon.container_ring.get_part('.shards_a/c_shard_1') + self.assertEqual( + ['/sda1/%s/a/c/o' % root_part] * 2 + + ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 1, 'successes': 1, 'failures': 1, 'unlinks': 1, + 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + self.assertFalse(os.listdir(async_dir)) # no async file + + def _check_async_file(self, async_dir): + async_subdirs = os.listdir(async_dir) + self.assertEqual([mock.ANY], async_subdirs) + async_files = os.listdir(os.path.join(async_dir, async_subdirs[0])) + self.assertEqual([mock.ANY], async_files) + async_path = os.path.join( + async_dir, async_subdirs[0], async_files[0]) + with open(async_path) as fd: + async_data = pickle.load(fd) + return async_path, async_data + + def _check_obj_put_async_update_bad_redirect_headers(self, headers): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0]) + orig_async_path, orig_async_data = self._check_async_file(async_dir) + + fake_responses = [ + (301, headers), + (301, headers), + (301, headers), + ] + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual(['/sda1/0/a/c/o'] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'failures': 1, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # async file still intact + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + self.assertEqual(orig_async_data, async_data) + return daemon + + def test_obj_put_async_root_update_missing_location_header(self): + headers = { + 'X-Backend-Redirect-Timestamp': next(self.ts_iter).internal} + self._check_obj_put_async_update_bad_redirect_headers(headers) + + def test_obj_put_async_root_update_bad_location_header(self): + headers = { + 'Location': 'bad bad bad', + 'X-Backend-Redirect-Timestamp': next(self.ts_iter).internal} + daemon = self._check_obj_put_async_update_bad_redirect_headers(headers) + error_lines = daemon.logger.get_lines_for_level('error') + self.assertIn('Container update failed', error_lines[0]) + self.assertIn('Invalid path: bad%20bad%20bad', error_lines[0]) + + def test_obj_put_async_shard_update_redirected_twice(self): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0], + container_path='.shards_a/c_shard_older') + orig_async_path, orig_async_data = self._check_async_file(async_dir) + + # run once + ts_redirect_1 = next(self.ts_iter) + ts_redirect_2 = next(self.ts_iter) + ts_redirect_3 = next(self.ts_iter) + fake_responses = [ + # 1st round of redirects, newest redirect should be chosen + (301, {'Location': '/.shards_a/c_shard_old/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_1.internal}), + (301, {'Location': '/.shards_a/c_shard_new/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_2.internal}), + (301, {'Location': '/.shards_a/c_shard_old/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_1.internal}), + # 2nd round of redirects + (301, {'Location': '/.shards_a/c_shard_newer/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_3.internal}), + (301, {'Location': '/.shards_a/c_shard_newer/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_3.internal}), + (301, {'Location': '/.shards_a/c_shard_newer/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_3.internal}), + ] + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests, ts_obj, policies[0]) + # only *one* set of redirected requests is attempted per cycle + older_part = daemon.container_ring.get_part('.shards_a/c_shard_older') + new_part = daemon.container_ring.get_part('.shards_a/c_shard_new') + newer_part = daemon.container_ring.get_part('.shards_a/c_shard_newer') + self.assertEqual( + ['/sda1/%s/.shards_a/c_shard_older/o' % older_part] * 3 + + ['/sda1/%s/.shards_a/c_shard_new/o' % new_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 2, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # update failed, we still have pending file with most recent redirect + # response Location header value added to data + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + self.assertEqual( + dict(orig_async_data, container_path='.shards_a/c_shard_newer', + redirect_history=['.shards_a/c_shard_new', + '.shards_a/c_shard_newer']), + async_data) + + # next cycle, should get latest redirect from pickled async update + fake_responses = [(200, {})] * 3 + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual( + ['/sda1/%s/.shards_a/c_shard_newer/o' % newer_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 2, 'successes': 1, 'unlinks': 1, + 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + self.assertFalse(os.listdir(async_dir)) # no async file + + def test_obj_put_async_update_redirection_loop(self): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0]) + orig_async_path, orig_async_data = self._check_async_file(async_dir) + + # run once + ts_redirect = next(self.ts_iter) + + resp_headers_1 = {'Location': '/.shards_a/c_shard_1/o', + 'X-Backend-Redirect-Timestamp': ts_redirect.internal} + resp_headers_2 = {'Location': '/.shards_a/c_shard_2/o', + 'X-Backend-Redirect-Timestamp': ts_redirect.internal} + fake_responses = ( + # 1st round of redirects, 2nd round of redirects + [(301, resp_headers_1)] * 3 + [(301, resp_headers_2)] * 3) + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + self._check_update_requests(conn.requests[:3], ts_obj, policies[0]) + self._check_update_requests(conn.requests[3:], ts_obj, policies[0]) + # only *one* set of redirected requests is attempted per cycle + root_part = daemon.container_ring.get_part('a/c') + shard_1_part = daemon.container_ring.get_part('.shards_a/c_shard_1') + shard_2_part = daemon.container_ring.get_part('.shards_a/c_shard_2') + shard_3_part = daemon.container_ring.get_part('.shards_a/c_shard_3') + self.assertEqual(['/sda1/%s/a/c/o' % root_part] * 3 + + ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 2, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # update failed, we still have pending file with most recent redirect + # response Location header value added to data + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + self.assertEqual( + dict(orig_async_data, container_path='.shards_a/c_shard_2', + redirect_history=['.shards_a/c_shard_1', + '.shards_a/c_shard_2']), + async_data) + + # next cycle, more redirects! first is to previously visited location + resp_headers_3 = {'Location': '/.shards_a/c_shard_3/o', + 'X-Backend-Redirect-Timestamp': ts_redirect.internal} + fake_responses = ( + # 1st round of redirects, 2nd round of redirects + [(301, resp_headers_1)] * 3 + [(301, resp_headers_3)] * 3) + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + self._check_update_requests(conn.requests[:3], ts_obj, policies[0]) + self._check_update_requests(conn.requests[3:], ts_obj, policies[0]) + # first try the previously persisted container path, response to that + # creates a loop so ignore and send to root + self.assertEqual( + ['/sda1/%s/.shards_a/c_shard_2/o' % shard_2_part] * 3 + + ['/sda1/%s/a/c/o' % root_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 4, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # update failed, we still have pending file with most recent redirect + # response Location header value from root added to persisted data + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + # note: redirect_history was reset when falling back to root + self.assertEqual( + dict(orig_async_data, container_path='.shards_a/c_shard_3', + redirect_history=['.shards_a/c_shard_3']), + async_data) + + # next cycle, more redirects! first is to a location visited previously + # but not since last fall back to root, so that location IS tried; + # second is to a location visited since last fall back to root so that + # location is NOT tried + fake_responses = ( + # 1st round of redirects, 2nd round of redirects + [(301, resp_headers_1)] * 3 + [(301, resp_headers_3)] * 3) + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual( + ['/sda1/%s/.shards_a/c_shard_3/o' % shard_3_part] * 3 + + ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 6, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # update failed, we still have pending file, but container_path is None + # because most recent redirect location was a repeat + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + self.assertEqual( + dict(orig_async_data, container_path=None, + redirect_history=[]), + async_data) + + # next cycle, persisted container path is None so update should go to + # root, this time it succeeds + fake_responses = [(200, {})] * 3 + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual(['/sda1/%s/a/c/o' % root_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 6, 'successes': 1, 'unlinks': 1, + 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + self.assertFalse(os.listdir(async_dir)) # no async file + if __name__ == '__main__': unittest.main() diff --git a/test/unit/proxy/controllers/test_base.py b/test/unit/proxy/controllers/test_base.py index 60d17c9ec8..93d71f6288 100644 --- a/test/unit/proxy/controllers/test_base.py +++ b/test/unit/proxy/controllers/test_base.py @@ -14,6 +14,7 @@ # limitations under the License. import itertools +import json from collections import defaultdict import unittest import mock @@ -23,11 +24,14 @@ from swift.proxy.controllers.base import headers_to_container_info, \ Controller, GetOrHeadHandler, bytes_to_skip from swift.common.swob import Request, HTTPException, RESPONSE_REASONS from swift.common import exceptions -from swift.common.utils import split_path +from swift.common.utils import split_path, ShardRange, Timestamp from swift.common.header_key_dict import HeaderKeyDict from swift.common.http import is_success from swift.common.storage_policy import StoragePolicy, StoragePolicyCollection -from test.unit import fake_http_connect, FakeRing, FakeMemcache, PatchPolicies +from test.unit import ( + fake_http_connect, FakeRing, FakeMemcache, PatchPolicies, FakeLogger, + make_timestamp_iter, + mocked_http_conn) from swift.proxy import server as proxy_server from swift.common.request_helpers import ( get_sys_meta_prefix, get_object_transient_sysmeta @@ -172,7 +176,8 @@ class TestFuncs(unittest.TestCase): def setUp(self): self.app = proxy_server.Application(None, FakeMemcache(), account_ring=FakeRing(), - container_ring=FakeRing()) + container_ring=FakeRing(), + logger=FakeLogger()) def test_get_info_zero_recheck(self): mock_cache = mock.Mock() @@ -1030,3 +1035,146 @@ class TestFuncs(unittest.TestCase): # prime numbers self.assertEqual(bytes_to_skip(11, 7), 4) self.assertEqual(bytes_to_skip(97, 7873823), 55) + + def test_get_shard_ranges_for_container_get(self): + ts_iter = make_timestamp_iter() + shard_ranges = [dict(ShardRange( + '.sharded_a/sr%d' % i, next(ts_iter), '%d_lower' % i, + '%d_upper' % i, object_count=i, bytes_used=1024 * i, + meta_timestamp=next(ts_iter))) + for i in range(3)] + base = Controller(self.app) + req = Request.blank('/v1/a/c', method='GET') + resp_headers = {'X-Backend-Record-Type': 'shard'} + with mocked_http_conn( + 200, 200, body_iter=iter(['', json.dumps(shard_ranges)]), + headers=resp_headers + ) as fake_conn: + actual = base._get_shard_ranges(req, 'a', 'c') + + # account info + captured = fake_conn.requests + self.assertEqual('HEAD', captured[0]['method']) + self.assertEqual('a', captured[0]['path'][7:]) + # container GET + self.assertEqual('GET', captured[1]['method']) + self.assertEqual('a/c', captured[1]['path'][7:]) + self.assertEqual('format=json', captured[1]['qs']) + self.assertEqual( + 'shard', captured[1]['headers'].get('X-Backend-Record-Type')) + self.assertEqual(shard_ranges, [dict(pr) for pr in actual]) + self.assertFalse(self.app.logger.get_lines_for_level('error')) + + def test_get_shard_ranges_for_object_put(self): + ts_iter = make_timestamp_iter() + shard_ranges = [dict(ShardRange( + '.sharded_a/sr%d' % i, next(ts_iter), '%d_lower' % i, + '%d_upper' % i, object_count=i, bytes_used=1024 * i, + meta_timestamp=next(ts_iter))) + for i in range(3)] + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + resp_headers = {'X-Backend-Record-Type': 'shard'} + with mocked_http_conn( + 200, 200, body_iter=iter(['', json.dumps(shard_ranges[1:2])]), + headers=resp_headers + ) as fake_conn: + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + + # account info + captured = fake_conn.requests + self.assertEqual('HEAD', captured[0]['method']) + self.assertEqual('a', captured[0]['path'][7:]) + # container GET + self.assertEqual('GET', captured[1]['method']) + self.assertEqual('a/c', captured[1]['path'][7:]) + params = sorted(captured[1]['qs'].split('&')) + self.assertEqual( + ['format=json', 'includes=1_test'], params) + self.assertEqual( + 'shard', captured[1]['headers'].get('X-Backend-Record-Type')) + self.assertEqual(shard_ranges[1:2], [dict(pr) for pr in actual]) + self.assertFalse(self.app.logger.get_lines_for_level('error')) + + def _check_get_shard_ranges_bad_data(self, body): + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + # empty response + headers = {'X-Backend-Record-Type': 'shard'} + with mocked_http_conn(200, 200, body_iter=iter(['', body]), + headers=headers): + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + self.assertIsNone(actual) + lines = self.app.logger.get_lines_for_level('error') + return lines + + def test_get_shard_ranges_empty_body(self): + error_lines = self._check_get_shard_ranges_bad_data('') + self.assertIn('Problem with listing response', error_lines[0]) + self.assertIn('No JSON', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_not_a_list(self): + error_lines = self._check_get_shard_ranges_bad_data(json.dumps({})) + self.assertIn('Problem with listing response', error_lines[0]) + self.assertIn('not a list', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_key_missing(self): + error_lines = self._check_get_shard_ranges_bad_data(json.dumps([{}])) + self.assertIn('Failed to get shard ranges', error_lines[0]) + self.assertIn('KeyError', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_invalid_shard_range(self): + sr = ShardRange('a/c', Timestamp.now()) + bad_sr_data = dict(sr, name='bad_name') + error_lines = self._check_get_shard_ranges_bad_data( + json.dumps([bad_sr_data])) + self.assertIn('Failed to get shard ranges', error_lines[0]) + self.assertIn('ValueError', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_missing_record_type(self): + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + sr = ShardRange('a/c', Timestamp.now()) + body = json.dumps([dict(sr)]) + with mocked_http_conn( + 200, 200, body_iter=iter(['', body])): + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + self.assertIsNone(actual) + error_lines = self.app.logger.get_lines_for_level('error') + self.assertIn('Failed to get shard ranges', error_lines[0]) + self.assertIn('unexpected record type', error_lines[0]) + self.assertIn('/a/c', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_wrong_record_type(self): + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + sr = ShardRange('a/c', Timestamp.now()) + body = json.dumps([dict(sr)]) + headers = {'X-Backend-Record-Type': 'object'} + with mocked_http_conn( + 200, 200, body_iter=iter(['', body]), + headers=headers): + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + self.assertIsNone(actual) + error_lines = self.app.logger.get_lines_for_level('error') + self.assertIn('Failed to get shard ranges', error_lines[0]) + self.assertIn('unexpected record type', error_lines[0]) + self.assertIn('/a/c', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_request_failed(self): + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + with mocked_http_conn(200, 404, 404, 404): + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + self.assertIsNone(actual) + self.assertFalse(self.app.logger.get_lines_for_level('error')) + warning_lines = self.app.logger.get_lines_for_level('warning') + self.assertIn('Failed to get container listing', warning_lines[0]) + self.assertIn('/a/c', warning_lines[0]) + self.assertFalse(warning_lines[1:]) diff --git a/test/unit/proxy/controllers/test_container.py b/test/unit/proxy/controllers/test_container.py index 03d53c2fde..ae44f8b001 100644 --- a/test/unit/proxy/controllers/test_container.py +++ b/test/unit/proxy/controllers/test_container.py @@ -12,17 +12,24 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +import json import mock import socket import unittest from eventlet import Timeout +from six.moves import urllib +from swift.common.constraints import CONTAINER_LISTING_LIMIT from swift.common.swob import Request +from swift.common.utils import ShardRange, Timestamp from swift.proxy import server as proxy_server -from swift.proxy.controllers.base import headers_to_container_info, Controller -from test.unit import fake_http_connect, FakeRing, FakeMemcache +from swift.proxy.controllers.base import headers_to_container_info, Controller, \ + get_container_info +from test import annotate_failure +from test.unit import fake_http_connect, FakeRing, FakeMemcache, \ + make_timestamp_iter from swift.common.storage_policy import StoragePolicy from swift.common.request_helpers import get_sys_meta_prefix @@ -72,6 +79,7 @@ class TestContainerController(TestRingBase): new=FakeAccountInfoContainerController): return _orig_get_controller(*args, **kwargs) self.app.get_controller = wrapped_get_controller + self.ts_iter = make_timestamp_iter() def _make_callback_func(self, context): def callback(ipaddr, port, device, partition, method, path, @@ -151,6 +159,91 @@ class TestContainerController(TestRingBase): for key in owner_headers: self.assertIn(key, resp.headers) + def test_reseller_admin(self): + reseller_internal_headers = { + get_sys_meta_prefix('container') + 'sharding': 'True'} + reseller_external_headers = {'x-container-sharding': 'on'} + controller = proxy_server.ContainerController(self.app, 'a', 'c') + + # Normal users, even swift owners, can't set it + req = Request.blank('/v1/a/c', method='PUT', + headers=reseller_external_headers, + environ={'swift_owner': True}) + with mocked_http_conn(*[201] * self.CONTAINER_REPLICAS) as mock_conn: + resp = req.get_response(self.app) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_internal_headers: + for captured in mock_conn.requests: + self.assertNotIn(key.title(), captured['headers']) + + req = Request.blank('/v1/a/c', method='POST', + headers=reseller_external_headers, + environ={'swift_owner': True}) + with mocked_http_conn(*[204] * self.CONTAINER_REPLICAS) as mock_conn: + resp = req.get_response(self.app) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_internal_headers: + for captured in mock_conn.requests: + self.assertNotIn(key.title(), captured['headers']) + + req = Request.blank('/v1/a/c', environ={'swift_owner': True}) + # Heck, they don't even get to know + with mock.patch('swift.proxy.controllers.base.http_connect', + fake_http_connect(200, 200, + headers=reseller_internal_headers)): + resp = controller.HEAD(req) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_external_headers: + self.assertNotIn(key, resp.headers) + + with mock.patch('swift.proxy.controllers.base.http_connect', + fake_http_connect(200, 200, + headers=reseller_internal_headers)): + resp = controller.GET(req) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_external_headers: + self.assertNotIn(key, resp.headers) + + # But reseller admins can set it + req = Request.blank('/v1/a/c', method='PUT', + headers=reseller_external_headers, + environ={'reseller_request': True}) + with mocked_http_conn(*[201] * self.CONTAINER_REPLICAS) as mock_conn: + resp = req.get_response(self.app) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_internal_headers: + for captured in mock_conn.requests: + self.assertIn(key.title(), captured['headers']) + + req = Request.blank('/v1/a/c', method='POST', + headers=reseller_external_headers, + environ={'reseller_request': True}) + with mocked_http_conn(*[204] * self.CONTAINER_REPLICAS) as mock_conn: + resp = req.get_response(self.app) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_internal_headers: + for captured in mock_conn.requests: + self.assertIn(key.title(), captured['headers']) + + # And see that they have + req = Request.blank('/v1/a/c', environ={'reseller_request': True}) + with mock.patch('swift.proxy.controllers.base.http_connect', + fake_http_connect(200, 200, + headers=reseller_internal_headers)): + resp = controller.HEAD(req) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_external_headers: + self.assertIn(key, resp.headers) + self.assertEqual(resp.headers[key], 'True') + + with mock.patch('swift.proxy.controllers.base.http_connect', + fake_http_connect(200, 200, + headers=reseller_internal_headers)): + resp = controller.GET(req) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_external_headers: + self.assertEqual(resp.headers[key], 'True') + def test_sys_meta_headers_PUT(self): # check that headers in sys meta namespace make it through # the container controller @@ -329,6 +422,852 @@ class TestContainerController(TestRingBase): ] self._assert_responses('POST', POST_TEST_CASES) + def _make_shard_objects(self, shard_range): + lower = ord(shard_range.lower[0]) if shard_range.lower else ord('@') + upper = ord(shard_range.upper[0]) if shard_range.upper else ord('z') + + objects = [{'name': chr(i), 'bytes': i, 'hash': 'hash%s' % chr(i), + 'content_type': 'text/plain', 'deleted': 0, + 'last_modified': next(self.ts_iter).isoformat} + for i in range(lower + 1, upper + 1)] + return objects + + def _check_GET_shard_listing(self, mock_responses, expected_objects, + expected_requests, query_string='', + reverse=False): + # mock_responses is a list of tuples (status, json body, headers) + # expected objects is a list of dicts + # expected_requests is a list of tuples (path, hdrs dict, params dict) + + # sanity check that expected objects is name ordered with no repeats + def name(obj): + return obj.get('name', obj.get('subdir')) + + for (prev, next_) in zip(expected_objects, expected_objects[1:]): + if reverse: + self.assertGreater(name(prev), name(next_)) + else: + self.assertLess(name(prev), name(next_)) + container_path = '/v1/a/c' + query_string + codes = (resp[0] for resp in mock_responses) + bodies = iter([json.dumps(resp[1]) for resp in mock_responses]) + exp_headers = [resp[2] for resp in mock_responses] + request = Request.blank(container_path) + with mocked_http_conn( + *codes, body_iter=bodies, headers=exp_headers) as fake_conn: + resp = request.get_response(self.app) + for backend_req in fake_conn.requests: + self.assertEqual(request.headers['X-Trans-Id'], + backend_req['headers']['X-Trans-Id']) + self.assertTrue(backend_req['headers']['User-Agent'].startswith( + 'proxy-server')) + self.assertEqual(200, resp.status_int) + actual_objects = json.loads(resp.body) + self.assertEqual(len(expected_objects), len(actual_objects)) + self.assertEqual(expected_objects, actual_objects) + self.assertEqual(len(expected_requests), len(fake_conn.requests)) + for i, ((exp_path, exp_headers, exp_params), req) in enumerate( + zip(expected_requests, fake_conn.requests)): + with annotate_failure('Request check at index %d.' % i): + # strip off /sdx/0/ from path + self.assertEqual(exp_path, req['path'][7:]) + self.assertEqual( + dict(exp_params, format='json'), + dict(urllib.parse.parse_qsl(req['qs'], True))) + for k, v in exp_headers.items(): + self.assertIn(k, req['headers']) + self.assertEqual(v, req['headers'][k]) + self.assertNotIn('X-Backend-Override-Delete', req['headers']) + return resp + + def check_response(self, resp, root_resp_hdrs, expected_objects=None): + info_hdrs = dict(root_resp_hdrs) + if expected_objects is None: + # default is to expect whatever the root container sent + expected_obj_count = root_resp_hdrs['X-Container-Object-Count'] + expected_bytes_used = root_resp_hdrs['X-Container-Bytes-Used'] + else: + expected_bytes_used = sum([o['bytes'] for o in expected_objects]) + expected_obj_count = len(expected_objects) + info_hdrs['X-Container-Bytes-Used'] = expected_bytes_used + info_hdrs['X-Container-Object-Count'] = expected_obj_count + self.assertEqual(expected_bytes_used, + int(resp.headers['X-Container-Bytes-Used'])) + self.assertEqual(expected_obj_count, + int(resp.headers['X-Container-Object-Count'])) + self.assertEqual('sharded', resp.headers['X-Backend-Sharding-State']) + for k, v in root_resp_hdrs.items(): + if k.lower().startswith('x-container-meta'): + self.assertEqual(v, resp.headers[k]) + # check that info cache is correct for root container + info = get_container_info(resp.request.environ, self.app) + self.assertEqual(headers_to_container_info(info_hdrs), info) + + def test_GET_sharded_container(self): + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', '')) + shard_ranges = [ + ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + expected_objects = all_objects + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + # pretend root object stats are not yet updated + 'X-Container-Object-Count': num_all_objects - 1, + 'X-Container-Bytes-Used': size_all_objects - 1, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + # GET all objects + # include some failed responses + mock_responses = [ + # status, body, headers + (404, '', {}), + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 404 + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', limit=str(limit), + states='listing')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + # GET all objects - sharding, final shard range points back to root + root_range = ShardRange('a/c', Timestamp.now(), 'pie', '') + mock_responses = [ + # status, body, headers + (200, sr_dicts[:2] + [dict(root_range)], root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], root_resp_hdrs) + ] + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', limit=str(limit), + states='listing')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (root_range.name, {'X-Backend-Record-Type': 'object'}, + dict(marker='p', end_marker='', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + # GET all objects in reverse + mock_responses = [ + # status, body, headers + (200, list(reversed(sr_dicts)), root_shard_resp_hdrs), + (200, list(reversed(sr_objs[2])), shard_resp_hdrs[2]), + (200, list(reversed(sr_objs[1])), shard_resp_hdrs[1]), + (200, list(reversed(sr_objs[0])), shard_resp_hdrs[0]), + ] + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', reverse='true')), + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='pie', reverse='true', + limit=str(limit), states='listing')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='q', end_marker='ham', states='listing', + reverse='true', limit=str(limit - len(sr_objs[2])))), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='i', end_marker='', states='listing', reverse='true', + limit=str(limit - len(sr_objs[2] + sr_objs[1])))), # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, list(reversed(expected_objects)), + expected_requests, query_string='?reverse=true', reverse=True) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + # GET with limit param + limit = len(sr_objs[0]) + len(sr_objs[1]) + 1 + expected_objects = all_objects[:limit] + mock_responses = [ + (404, '', {}), + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2][:1], shard_resp_hdrs[2]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(limit=str(limit), states='listing')), # 404 + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(limit=str(limit), states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) + ] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?limit=%s' % limit) + self.check_response(resp, root_resp_hdrs) + + # GET with marker + marker = sr_objs[1][2]['name'] + first_included = len(sr_objs[0]) + 2 + limit = CONTAINER_LISTING_LIMIT + expected_objects = all_objects[first_included:] + mock_responses = [ + (404, '', {}), + (200, sr_dicts[1:], root_shard_resp_hdrs), + (404, '', {}), + (200, sr_objs[1][2:], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(marker=marker, states='listing')), # 404 + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(marker=marker, states='listing')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 404 + dict(marker=marker, end_marker='pie\x00', states='listing', + limit=str(limit))), + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker=marker, end_marker='pie\x00', states='listing', + limit=str(limit))), + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[1][2:])))), + ] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?marker=%s' % marker) + self.check_response(resp, root_resp_hdrs) + + # GET with end marker + end_marker = sr_objs[1][6]['name'] + first_excluded = len(sr_objs[0]) + 6 + expected_objects = all_objects[:first_excluded] + mock_responses = [ + (404, '', {}), + (200, sr_dicts[:2], root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (404, '', {}), + (200, sr_objs[1][:6], shard_resp_hdrs[1]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(end_marker=end_marker, states='listing')), # 404 + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(end_marker=end_marker, states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 404 + dict(marker='h', end_marker=end_marker, states='listing', + limit=str(limit - len(sr_objs[0])))), + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='h', end_marker=end_marker, states='listing', + limit=str(limit - len(sr_objs[0])))), + ] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?end_marker=%s' % end_marker) + self.check_response(resp, root_resp_hdrs) + + # marker and end_marker and limit + limit = 2 + expected_objects = all_objects[first_included:first_excluded] + mock_responses = [ + (200, sr_dicts[1:2], root_shard_resp_hdrs), + (200, sr_objs[1][2:6], shard_resp_hdrs[1]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', limit=str(limit), + marker=marker, end_marker=end_marker)), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker=marker, end_marker=end_marker, states='listing', + limit=str(limit))), + ] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?marker=%s&end_marker=%s&limit=%s' + % (marker, end_marker, limit)) + self.check_response(resp, root_resp_hdrs) + + # reverse with marker, end_marker + expected_objects.reverse() + mock_responses = [ + (200, sr_dicts[1:2], root_shard_resp_hdrs), + (200, list(reversed(sr_objs[1][2:6])), shard_resp_hdrs[1]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(marker=end_marker, reverse='true', end_marker=marker, + limit=str(limit), states='listing',)), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker=end_marker, end_marker=marker, states='listing', + limit=str(limit), reverse='true')), + ] + self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?marker=%s&end_marker=%s&limit=%s&reverse=true' + % (end_marker, marker, limit), reverse=True) + self.check_response(resp, root_resp_hdrs) + + def test_GET_sharded_container_with_delimiter(self): + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', '')) + shard_ranges = [ + ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + shard_resp_hdrs = {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': 2, + 'X-Container-Bytes-Used': 4, + 'X-Backend-Storage-Policy-Index': 0} + + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + # pretend root object stats are not yet updated + 'X-Container-Object-Count': 6, + 'X-Container-Bytes-Used': 12, + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + sr_0_obj = {'name': 'apple', + 'bytes': 1, + 'hash': 'hash', + 'content_type': 'text/plain', + 'deleted': 0, + 'last_modified': next(self.ts_iter).isoformat} + sr_2_obj = {'name': 'pumpkin', + 'bytes': 1, + 'hash': 'hash', + 'content_type': 'text/plain', + 'deleted': 0, + 'last_modified': next(self.ts_iter).isoformat} + subdir = {'subdir': 'ha/'} + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, [sr_0_obj, subdir], shard_resp_hdrs), + (200, [], shard_resp_hdrs), + (200, [sr_2_obj], shard_resp_hdrs) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', delimiter='/')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', limit=str(limit), + states='listing', delimiter='/')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='ha/', end_marker='pie\x00', states='listing', + limit=str(limit - 2), delimiter='/')), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='ha/', end_marker='', states='listing', + limit=str(limit - 2), delimiter='/')) # 200 + ] + + expected_objects = [sr_0_obj, subdir, sr_2_obj] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?delimiter=/') + self.check_response(resp, root_resp_hdrs) + + def test_GET_sharded_container_overlapping_shards(self): + # verify ordered listing even if unexpected overlapping shard ranges + shard_bounds = (('', 'ham', ShardRange.CLEAVED), + ('', 'pie', ShardRange.ACTIVE), + ('lemon', '', ShardRange.ACTIVE)) + shard_ranges = [ + ShardRange('.shards_a/c_' + upper, Timestamp.now(), lower, upper, + state=state) + for lower, upper, state in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + # pretend root object stats are not yet updated + 'X-Container-Object-Count': num_all_objects - 1, + 'X-Container-Bytes-Used': size_all_objects - 1, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + # forwards listing + + # expect subset of second shard range + objs_1 = [o for o in sr_objs[1] if o['name'] > sr_objs[0][-1]['name']] + # expect subset of third shard range + objs_2 = [o for o in sr_objs[2] if o['name'] > sr_objs[1][-1]['name']] + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, objs_1, shard_resp_hdrs[1]), + (200, objs_2, shard_resp_hdrs[2]) + ] + # NB marker always advances to last object name + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + objs_1)))) # 200 + ] + + expected_objects = sr_objs[0] + objs_1 + objs_2 + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + # reverse listing + + # expect subset of third shard range + objs_0 = [o for o in sr_objs[0] if o['name'] < sr_objs[1][0]['name']] + # expect subset of second shard range + objs_1 = [o for o in sr_objs[1] if o['name'] < sr_objs[2][0]['name']] + mock_responses = [ + # status, body, headers + (200, list(reversed(sr_dicts)), root_shard_resp_hdrs), + (200, list(reversed(sr_objs[2])), shard_resp_hdrs[2]), + (200, list(reversed(objs_1)), shard_resp_hdrs[1]), + (200, list(reversed(objs_0)), shard_resp_hdrs[0]), + ] + # NB marker always advances to last object name + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', reverse='true')), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='lemon', states='listing', + limit=str(limit), + reverse='true')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='m', end_marker='', reverse='true', states='listing', + limit=str(limit - len(sr_objs[2])))), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='A', end_marker='', reverse='true', states='listing', + limit=str(limit - len(sr_objs[2] + objs_1)))) # 200 + ] + + expected_objects = list(reversed(objs_0 + objs_1 + sr_objs[2])) + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?reverse=true', reverse=True) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + def test_GET_sharded_container_gap_in_shards(self): + # verify ordered listing even if unexpected gap between shard ranges + shard_bounds = (('', 'ham'), ('onion', 'pie'), ('rhubarb', '')) + shard_ranges = [ + ShardRange('.shards_a/c_' + upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': num_all_objects, + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker always advances to last object name + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, all_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + def test_GET_sharded_container_empty_shard(self): + # verify ordered listing when a shard is empty + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('lemon', '')) + shard_ranges = [ + ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + # empty second shard range + sr_objs[1] = [] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': num_all_objects, + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker always advances to last object name + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, all_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + # marker in empty second range + mock_responses = [ + # status, body, headers + (200, sr_dicts[1:], root_shard_resp_hdrs), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker unchanged when getting from third range + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', marker='koolaid')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='koolaid', end_marker='pie\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='koolaid', end_marker='', states='listing', + limit=str(limit))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, sr_objs[2], expected_requests, + query_string='?marker=koolaid') + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + # marker in empty second range, reverse + mock_responses = [ + # status, body, headers + (200, list(reversed(sr_dicts[:2])), root_shard_resp_hdrs), + (200, list(reversed(sr_objs[1])), shard_resp_hdrs[1]), + (200, list(reversed(sr_objs[0])), shard_resp_hdrs[2]) + ] + # NB marker unchanged when getting from first range + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', marker='koolaid', reverse='true')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='koolaid', end_marker='ham', reverse='true', + states='listing', limit=str(limit))), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='koolaid', end_marker='', reverse='true', + states='listing', limit=str(limit))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, list(reversed(sr_objs[0])), expected_requests, + query_string='?marker=koolaid&reverse=true', reverse=True) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + def _check_GET_sharded_container_shard_error(self, error): + # verify ordered listing when a shard is empty + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('lemon', '')) + shard_ranges = [ + ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + # empty second shard range + sr_objs[1] = [] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': num_all_objects, + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0])] + \ + [(error, [], {})] * 2 * self.CONTAINER_REPLICAS + \ + [(200, sr_objs[2], shard_resp_hdrs[2])] + + # NB marker always advances to last object name + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit)))] \ + + [(shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))) + ] * 2 * self.CONTAINER_REPLICAS \ + + [(shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1]))))] + + resp = self._check_GET_shard_listing( + mock_responses, all_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + def test_GET_sharded_container_shard_errors(self): + self._check_GET_sharded_container_shard_error(404) + self._check_GET_sharded_container_shard_error(500) + + def test_GET_sharded_container_sharding_shard(self): + # one shard is in process of sharding + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', '')) + shard_ranges = [ + ShardRange('.shards_a/c_' + upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + shard_1_shard_resp_hdrs = dict(shard_resp_hdrs[1]) + shard_1_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + # second shard is sharding and has cleaved two out of three sub shards + shard_resp_hdrs[1]['X-Backend-Sharding-State'] = 'sharding' + sub_shard_bounds = (('ham', 'juice'), ('juice', 'lemon')) + sub_shard_ranges = [ + ShardRange('a/c_sub_' + upper, Timestamp.now(), lower, upper) + for lower, upper in sub_shard_bounds] + sub_sr_dicts = [dict(sr) for sr in sub_shard_ranges] + sub_sr_objs = [self._make_shard_objects(sr) for sr in sub_shard_ranges] + sub_shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sub_sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sub_sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(2)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': num_all_objects, + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sub_sr_dicts + [sr_dicts[1]], shard_1_shard_resp_hdrs), + (200, sub_sr_objs[0], sub_shard_resp_hdrs[0]), + (200, sub_sr_objs[1], sub_shard_resp_hdrs[1]), + (200, sr_objs[1][len(sub_sr_objs[0] + sub_sr_objs[1]):], + shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker always advances to last object name + expected_requests = [ + # get root shard ranges + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + # get first shard objects + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + # get second shard sub-shard ranges + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), + # get first sub-shard objects + (sub_shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='juice\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), + # get second sub-shard objects + (sub_shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='j', end_marker='lemon\x00', states='listing', + limit=str(limit - len(sr_objs[0] + sub_sr_objs[0])))), + # get remainder of first shard objects + (shard_ranges[1].name, {'X-Backend-Record-Type': 'object'}, + dict(marker='l', end_marker='pie\x00', + limit=str(limit - len(sr_objs[0] + sub_sr_objs[0] + + sub_sr_objs[1])))), # 200 + # get third shard objects + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + ] + expected_objects = ( + sr_objs[0] + sub_sr_objs[0] + sub_sr_objs[1] + + sr_objs[1][len(sub_sr_objs[0] + sub_sr_objs[1]):] + sr_objs[2]) + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + @patch_policies( [StoragePolicy(0, 'zero', True, object_ring=FakeRing(replicas=4))]) diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index bee74c380a..8e67abb009 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -47,7 +47,7 @@ from eventlet.green import httplib from six import BytesIO from six import StringIO from six.moves import range -from six.moves.urllib.parse import quote +from six.moves.urllib.parse import quote, parse_qsl from test import listen_zero from test.unit import ( @@ -3222,95 +3222,197 @@ class TestReplicatedObjectController( # reset the router post patch_policies self.app.obj_controller_router = proxy_server.ObjectControllerRouter() self.app.sort_nodes = lambda nodes, *args, **kwargs: nodes - backend_requests = [] - def capture_requests(ip, port, method, path, headers, *args, - **kwargs): - backend_requests.append((method, path, headers)) + def do_test(resp_headers): + self.app.memcache.store = {} + backend_requests = [] - req = Request.blank('/v1/a/c/o', {}, method='POST', - headers={'X-Object-Meta-Color': 'Blue', - 'Content-Type': 'text/plain'}) + def capture_requests(ip, port, method, path, headers, *args, + **kwargs): + backend_requests.append((method, path, headers)) - # we want the container_info response to says a policy index of 1 - resp_headers = {'X-Backend-Storage-Policy-Index': 1} - with mocked_http_conn( - 200, 200, 202, 202, 202, - headers=resp_headers, give_connect=capture_requests - ) as fake_conn: - resp = req.get_response(self.app) - self.assertRaises(StopIteration, fake_conn.code_iter.next) + req = Request.blank('/v1/a/c/o', {}, method='POST', + headers={'X-Object-Meta-Color': 'Blue', + 'Content-Type': 'text/plain'}) - self.assertEqual(resp.status_int, 202) - self.assertEqual(len(backend_requests), 5) + # we want the container_info response to says a policy index of 1 + with mocked_http_conn( + 200, 200, 202, 202, 202, + headers=resp_headers, give_connect=capture_requests + ) as fake_conn: + resp = req.get_response(self.app) + self.assertRaises(StopIteration, fake_conn.code_iter.next) - def check_request(req, method, path, headers=None): - req_method, req_path, req_headers = req - self.assertEqual(method, req_method) - # caller can ignore leading path parts - self.assertTrue(req_path.endswith(path), - 'expected path to end with %s, it was %s' % ( - path, req_path)) - headers = headers or {} - # caller can ignore some headers - for k, v in headers.items(): - self.assertEqual(req_headers[k], v) - account_request = backend_requests.pop(0) - check_request(account_request, method='HEAD', path='/sda/0/a') - container_request = backend_requests.pop(0) - check_request(container_request, method='HEAD', path='/sda/0/a/c') - # make sure backend requests included expected container headers - container_headers = {} - for request in backend_requests: - req_headers = request[2] - device = req_headers['x-container-device'] - host = req_headers['x-container-host'] - container_headers[device] = host - expectations = { - 'method': 'POST', - 'path': '/0/a/c/o', - 'headers': { - 'X-Container-Partition': '0', - 'Connection': 'close', - 'User-Agent': 'proxy-server %s' % os.getpid(), - 'Host': 'localhost:80', - 'Referer': 'POST http://localhost/v1/a/c/o', - 'X-Object-Meta-Color': 'Blue', - 'X-Backend-Storage-Policy-Index': '1' - }, - } - check_request(request, **expectations) + self.assertEqual(resp.status_int, 202) + self.assertEqual(len(backend_requests), 5) - expected = {} - for i, device in enumerate(['sda', 'sdb', 'sdc']): - expected[device] = '10.0.0.%d:100%d' % (i, i) - self.assertEqual(container_headers, expected) + def check_request(req, method, path, headers=None): + req_method, req_path, req_headers = req + self.assertEqual(method, req_method) + # caller can ignore leading path parts + self.assertTrue(req_path.endswith(path), + 'expected path to end with %s, it was %s' % ( + path, req_path)) + headers = headers or {} + # caller can ignore some headers + for k, v in headers.items(): + self.assertEqual(req_headers[k], v) + self.assertNotIn('X-Backend-Container-Path', req_headers) - # and again with policy override - self.app.memcache.store = {} - backend_requests = [] - req = Request.blank('/v1/a/c/o', {}, method='POST', - headers={'X-Object-Meta-Color': 'Blue', - 'Content-Type': 'text/plain', - 'X-Backend-Storage-Policy-Index': 0}) - with mocked_http_conn( - 200, 200, 202, 202, 202, - headers=resp_headers, give_connect=capture_requests - ) as fake_conn: - resp = req.get_response(self.app) - self.assertRaises(StopIteration, fake_conn.code_iter.next) - self.assertEqual(resp.status_int, 202) - self.assertEqual(len(backend_requests), 5) - for request in backend_requests[2:]: - expectations = { - 'method': 'POST', - 'path': '/0/a/c/o', # ignore device bit - 'headers': { - 'X-Object-Meta-Color': 'Blue', - 'X-Backend-Storage-Policy-Index': '0', + account_request = backend_requests.pop(0) + check_request(account_request, method='HEAD', path='/sda/0/a') + container_request = backend_requests.pop(0) + check_request(container_request, method='HEAD', path='/sda/0/a/c') + # make sure backend requests included expected container headers + container_headers = {} + for request in backend_requests: + req_headers = request[2] + device = req_headers['x-container-device'] + host = req_headers['x-container-host'] + container_headers[device] = host + expectations = { + 'method': 'POST', + 'path': '/0/a/c/o', + 'headers': { + 'X-Container-Partition': '0', + 'Connection': 'close', + 'User-Agent': 'proxy-server %s' % os.getpid(), + 'Host': 'localhost:80', + 'Referer': 'POST http://localhost/v1/a/c/o', + 'X-Object-Meta-Color': 'Blue', + 'X-Backend-Storage-Policy-Index': '1' + }, } - } - check_request(request, **expectations) + check_request(request, **expectations) + + expected = {} + for i, device in enumerate(['sda', 'sdb', 'sdc']): + expected[device] = '10.0.0.%d:100%d' % (i, i) + self.assertEqual(container_headers, expected) + + # and again with policy override + self.app.memcache.store = {} + backend_requests = [] + req = Request.blank('/v1/a/c/o', {}, method='POST', + headers={'X-Object-Meta-Color': 'Blue', + 'Content-Type': 'text/plain', + 'X-Backend-Storage-Policy-Index': 0}) + with mocked_http_conn( + 200, 200, 202, 202, 202, + headers=resp_headers, give_connect=capture_requests + ) as fake_conn: + resp = req.get_response(self.app) + self.assertRaises(StopIteration, fake_conn.code_iter.next) + self.assertEqual(resp.status_int, 202) + self.assertEqual(len(backend_requests), 5) + for request in backend_requests[2:]: + expectations = { + 'method': 'POST', + 'path': '/0/a/c/o', # ignore device bit + 'headers': { + 'X-Object-Meta-Color': 'Blue', + 'X-Backend-Storage-Policy-Index': '0', + } + } + check_request(request, **expectations) + + resp_headers = {'X-Backend-Storage-Policy-Index': 1} + do_test(resp_headers) + resp_headers['X-Backend-Sharding-State'] = 'unsharded' + do_test(resp_headers) + + @patch_policies([ + StoragePolicy(0, 'zero', is_default=True, object_ring=FakeRing()), + StoragePolicy(1, 'one', object_ring=FakeRing()), + ]) + def test_backend_headers_update_shard_container(self): + # verify that when container is sharded the backend container update is + # directed to the shard container + # reset the router post patch_policies + self.app.obj_controller_router = proxy_server.ObjectControllerRouter() + self.app.sort_nodes = lambda nodes, *args, **kwargs: nodes + + def do_test(method, sharding_state): + self.app.memcache.store = {} + req = Request.blank('/v1/a/c/o', {}, method=method, body='', + headers={'Content-Type': 'text/plain'}) + + # we want the container_info response to say policy index of 1 and + # sharding state + # acc HEAD, cont HEAD, cont shard GET, obj POSTs + status_codes = (200, 200, 200, 202, 202, 202) + resp_headers = {'X-Backend-Storage-Policy-Index': 1, + 'x-backend-sharding-state': sharding_state, + 'X-Backend-Record-Type': 'shard'} + shard_range = utils.ShardRange( + '.shards_a/c_shard', utils.Timestamp.now(), 'l', 'u') + body = json.dumps([dict(shard_range)]) + with mocked_http_conn(*status_codes, headers=resp_headers, + body=body) as fake_conn: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 202) + backend_requests = fake_conn.requests + + def check_request(req, method, path, headers=None, params=None): + self.assertEqual(method, req['method']) + # caller can ignore leading path parts + self.assertTrue(req['path'].endswith(path), + 'expected path to end with %s, it was %s' % ( + path, req['path'])) + headers = headers or {} + # caller can ignore some headers + for k, v in headers.items(): + self.assertEqual(req['headers'][k], v, + 'Expected %s but got %s for key %s' % + (v, req['headers'][k], k)) + params = params or {} + req_params = dict(parse_qsl(req['qs'])) if req['qs'] else {} + for k, v in params.items(): + self.assertEqual(req_params[k], v, + 'Expected %s but got %s for key %s' % + (v, req_params[k], k)) + + account_request = backend_requests[0] + check_request(account_request, method='HEAD', path='/sda/0/a') + container_request = backend_requests[1] + check_request(container_request, method='HEAD', path='/sda/0/a/c') + container_request_shard = backend_requests[2] + check_request( + container_request_shard, method='GET', path='/sda/0/a/c', + params={'includes': 'o'}) + + # make sure backend requests included expected container headers + container_headers = {} + + for request in backend_requests[3:]: + req_headers = request['headers'] + device = req_headers['x-container-device'] + container_headers[device] = req_headers['x-container-host'] + expectations = { + 'method': method, + 'path': '/0/a/c/o', + 'headers': { + 'X-Container-Partition': '0', + 'Host': 'localhost:80', + 'Referer': '%s http://localhost/v1/a/c/o' % method, + 'X-Backend-Storage-Policy-Index': '1', + 'X-Backend-Container-Path': shard_range.name + }, + } + check_request(request, **expectations) + + expected = {} + for i, device in enumerate(['sda', 'sdb', 'sdc']): + expected[device] = '10.0.0.%d:100%d' % (i, i) + self.assertEqual(container_headers, expected) + + do_test('POST', 'sharding') + do_test('POST', 'sharded') + do_test('DELETE', 'sharding') + do_test('DELETE', 'sharded') + do_test('PUT', 'sharding') + do_test('PUT', 'sharded') def test_DELETE(self): with save_globals(): @@ -8356,6 +8458,29 @@ class TestContainerController(unittest.TestCase): self.assertEqual(res.content_length, 0) self.assertNotIn('transfer-encoding', res.headers) + def test_GET_account_non_existent(self): + with save_globals(): + set_http_connect(404, 404, 404) + controller = proxy_server.ContainerController(self.app, 'a', 'c') + req = Request.blank('/v1/a/c') + self.app.update_request(req) + res = controller.GET(req) + self.assertEqual(res.status_int, 404) + self.assertNotIn('container/a/c', res.environ['swift.infocache']) + + def test_GET_auto_create_prefix_account_non_existent(self): + with save_globals(): + set_http_connect(404, 404, 404, 204, 204, 204) + controller = proxy_server.ContainerController(self.app, '.a', 'c') + req = Request.blank('/v1/a/c') + self.app.update_request(req) + res = controller.GET(req) + self.assertEqual(res.status_int, 204) + ic = res.environ['swift.infocache'] + self.assertEqual(ic['container/.a/c']['status'], 204) + self.assertEqual(res.content_length, 0) + self.assertNotIn('transfer-encoding', res.headers) + def test_GET_calls_authorize(self): called = [False]