diff --git a/bin/swift-container-sharder b/bin/swift-container-sharder
new file mode 100755
index 0000000000..3e6551319b
--- /dev/null
+++ b/bin/swift-container-sharder
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# Copyright (c) 2010-2015 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from swift.container.sharder import ContainerSharder
+from swift.common.utils import parse_options
+from swift.common.daemon import run_daemon
+from optparse import OptionParser
+
+if __name__ == '__main__':
+ parser = OptionParser("%prog CONFIG [options]")
+ parser.add_option('-d', '--devices',
+ help='Shard containers only on given devices. '
+ 'Comma-separated list. '
+ 'Only has effect if --once is used.')
+ parser.add_option('-p', '--partitions',
+ help='Shard containers only in given partitions. '
+ 'Comma-separated list. '
+ 'Only has effect if --once is used.')
+ conf_file, options = parse_options(parser=parser, once=True)
+ run_daemon(ContainerSharder, conf_file, **options)
diff --git a/doc/saio/swift/container-server/1.conf b/doc/saio/swift/container-server/1.conf
index 5bf3c0f28c..e71a5b6683 100644
--- a/doc/saio/swift/container-server/1.conf
+++ b/doc/saio/swift/container-server/1.conf
@@ -27,3 +27,13 @@ rsync_module = {replication_ip}::container{replication_port}
[container-auditor]
[container-sync]
+
+[container-sharder]
+auto_shard = true
+rsync_module = {replication_ip}::container{replication_port}
+# This is intentionally much smaller than the default of 1,000,000 so tests
+# can run in a reasonable amount of time
+shard_container_threshold = 100
+# The probe tests make explicit assumptions about the batch sizes
+shard_scanner_batch_size = 10
+cleave_batch_size = 2
diff --git a/doc/saio/swift/container-server/2.conf b/doc/saio/swift/container-server/2.conf
index 0b29ada029..86e58a9fde 100644
--- a/doc/saio/swift/container-server/2.conf
+++ b/doc/saio/swift/container-server/2.conf
@@ -27,3 +27,13 @@ rsync_module = {replication_ip}::container{replication_port}
[container-auditor]
[container-sync]
+
+[container-sharder]
+auto_shard = true
+rsync_module = {replication_ip}::container{replication_port}
+# This is intentionally much smaller than the default of 1,000,000 so tests
+# can run in a reasonable amount of time
+shard_container_threshold = 100
+# The probe tests make explicit assumptions about the batch sizes
+shard_scanner_batch_size = 10
+cleave_batch_size = 2
diff --git a/doc/saio/swift/container-server/3.conf b/doc/saio/swift/container-server/3.conf
index 9f340d07e6..73e760af15 100644
--- a/doc/saio/swift/container-server/3.conf
+++ b/doc/saio/swift/container-server/3.conf
@@ -27,3 +27,13 @@ rsync_module = {replication_ip}::container{replication_port}
[container-auditor]
[container-sync]
+
+[container-sharder]
+auto_shard = true
+rsync_module = {replication_ip}::container{replication_port}
+# This is intentionally much smaller than the default of 1,000,000 so tests
+# can run in a reasonable amount of time
+shard_container_threshold = 100
+# The probe tests make explicit assumptions about the batch sizes
+shard_scanner_batch_size = 10
+cleave_batch_size = 2
diff --git a/doc/saio/swift/container-server/4.conf b/doc/saio/swift/container-server/4.conf
index 5e95e9c57c..c254191b8f 100644
--- a/doc/saio/swift/container-server/4.conf
+++ b/doc/saio/swift/container-server/4.conf
@@ -27,3 +27,13 @@ rsync_module = {replication_ip}::container{replication_port}
[container-auditor]
[container-sync]
+
+[container-sharder]
+auto_shard = true
+rsync_module = {replication_ip}::container{replication_port}
+# This is intentionally much smaller than the default of 1,000,000 so tests
+# can run in a reasonable amount of time
+shard_container_threshold = 100
+# The probe tests make explicit assumptions about the batch sizes
+shard_scanner_batch_size = 10
+cleave_batch_size = 2
diff --git a/doc/saio/swift/internal-client.conf b/doc/saio/swift/internal-client.conf
new file mode 100644
index 0000000000..052d1e7549
--- /dev/null
+++ b/doc/saio/swift/internal-client.conf
@@ -0,0 +1,24 @@
+[DEFAULT]
+
+[pipeline:main]
+pipeline = catch_errors proxy-logging cache symlink proxy-server
+
+[app:proxy-server]
+use = egg:swift#proxy
+account_autocreate = true
+# See proxy-server.conf-sample for options
+
+[filter:symlink]
+use = egg:swift#symlink
+# See proxy-server.conf-sample for options
+
+[filter:cache]
+use = egg:swift#memcache
+# See proxy-server.conf-sample for options
+
+[filter:proxy-logging]
+use = egg:swift#proxy_logging
+
+[filter:catch_errors]
+use = egg:swift#catch_errors
+# See proxy-server.conf-sample for options
diff --git a/doc/source/container.rst b/doc/source/container.rst
index dcff33e3aa..bc95753852 100644
--- a/doc/source/container.rst
+++ b/doc/source/container.rst
@@ -24,6 +24,16 @@ Container Backend
:undoc-members:
:show-inheritance:
+.. _container-replicator:
+
+Container Replicator
+====================
+
+.. automodule:: swift.container.replicator
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
.. _container-server:
Container Server
@@ -44,12 +54,12 @@ Container Reconciler
:undoc-members:
:show-inheritance:
-.. _container-replicator:
+.. _container-sharder:
-Container Replicator
-====================
+Container Sharder
+=================
-.. automodule:: swift.container.replicator
+.. automodule:: swift.container.sharder
:members:
:undoc-members:
:show-inheritance:
diff --git a/doc/source/images/sharded_GET.svg b/doc/source/images/sharded_GET.svg
new file mode 100644
index 0000000000..03c271b5cc
--- /dev/null
+++ b/doc/source/images/sharded_GET.svg
@@ -0,0 +1,2019 @@
+
+
diff --git a/doc/source/images/sharding_GET.svg b/doc/source/images/sharding_GET.svg
new file mode 100644
index 0000000000..5e9240feeb
--- /dev/null
+++ b/doc/source/images/sharding_GET.svg
@@ -0,0 +1,2112 @@
+
+
diff --git a/doc/source/images/sharding_cleave1_load.svg b/doc/source/images/sharding_cleave1_load.svg
new file mode 100644
index 0000000000..4485e3ea09
--- /dev/null
+++ b/doc/source/images/sharding_cleave1_load.svg
@@ -0,0 +1,1694 @@
+
+
diff --git a/doc/source/images/sharding_cleave2_load.svg b/doc/source/images/sharding_cleave2_load.svg
new file mode 100644
index 0000000000..548aab56ab
--- /dev/null
+++ b/doc/source/images/sharding_cleave2_load.svg
@@ -0,0 +1,1754 @@
+
+
diff --git a/doc/source/images/sharding_cleave_basic.svg b/doc/source/images/sharding_cleave_basic.svg
new file mode 100644
index 0000000000..fd5069754f
--- /dev/null
+++ b/doc/source/images/sharding_cleave_basic.svg
@@ -0,0 +1,649 @@
+
+
diff --git a/doc/source/images/sharding_db_states.svg b/doc/source/images/sharding_db_states.svg
new file mode 100644
index 0000000000..6693ef9b3a
--- /dev/null
+++ b/doc/source/images/sharding_db_states.svg
@@ -0,0 +1,1502 @@
+
+
diff --git a/doc/source/images/sharding_scan_basic.svg b/doc/source/images/sharding_scan_basic.svg
new file mode 100644
index 0000000000..54c30f0d8d
--- /dev/null
+++ b/doc/source/images/sharding_scan_basic.svg
@@ -0,0 +1,259 @@
+
+
diff --git a/doc/source/images/sharding_scan_load.svg b/doc/source/images/sharding_scan_load.svg
new file mode 100644
index 0000000000..327ac1a06c
--- /dev/null
+++ b/doc/source/images/sharding_scan_load.svg
@@ -0,0 +1,1665 @@
+
+
diff --git a/doc/source/images/sharding_sharded_load.svg b/doc/source/images/sharding_sharded_load.svg
new file mode 100644
index 0000000000..ae9aacb86c
--- /dev/null
+++ b/doc/source/images/sharding_sharded_load.svg
@@ -0,0 +1,1650 @@
+
+
diff --git a/doc/source/images/sharding_unsharded.svg b/doc/source/images/sharding_unsharded.svg
new file mode 100644
index 0000000000..4241b0de13
--- /dev/null
+++ b/doc/source/images/sharding_unsharded.svg
@@ -0,0 +1,199 @@
+
+
diff --git a/doc/source/images/sharding_unsharded_load.svg b/doc/source/images/sharding_unsharded_load.svg
new file mode 100644
index 0000000000..e613e8cbbd
--- /dev/null
+++ b/doc/source/images/sharding_unsharded_load.svg
@@ -0,0 +1,219 @@
+
+
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 63df790815..b72925c6dd 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -62,6 +62,7 @@ Overview and Concepts
overview_erasure_code
overview_encryption
overview_backing_store
+ overview_container_sharding
ring_background
ring_partpower
associated_projects
diff --git a/doc/source/logs.rst b/doc/source/logs.rst
index f9a8ba2c62..1a5d2656c2 100644
--- a/doc/source/logs.rst
+++ b/doc/source/logs.rst
@@ -105,6 +105,7 @@ RL :ref:`ratelimit`
VW :ref:`versioned_writes`
SSC :ref:`copy`
SYM :ref:`symlink`
+SH :ref:`sharding_doc`
======================= =============================
diff --git a/doc/source/overview_architecture.rst b/doc/source/overview_architecture.rst
index 30b26a471f..b0ae293d9a 100644
--- a/doc/source/overview_architecture.rst
+++ b/doc/source/overview_architecture.rst
@@ -172,6 +172,8 @@ replicator for Replication type policies. See :doc:`overview_erasure_code`
for complete information on both Erasure Code support as well as the
reconstructor.
+.. _architecture_updaters:
+
--------
Updaters
--------
diff --git a/doc/source/overview_container_sharding.rst b/doc/source/overview_container_sharding.rst
new file mode 100644
index 0000000000..110fcc8f87
--- /dev/null
+++ b/doc/source/overview_container_sharding.rst
@@ -0,0 +1,784 @@
+.. _sharding_doc:
+
+==================
+Container Sharding
+==================
+
+Container sharding is an operator controlled feature that may be used to shard
+very large container databases into a number of smaller shard containers
+
+.. note::
+
+ Container sharding is currently an experimental feature. It is strongly
+ recommended that operators gain experience of sharding containers in a
+ non-production cluster before using in production.
+
+ The sharding process involves moving all sharding container database
+ records via the container replication engine; the time taken to complete
+ sharding is dependent upon the existing cluster load and the performance of
+ the container database being sharded.
+
+ There is currently no documented process for reversing the sharding
+ process once sharding has been enabled.
+
+
+----------
+Background
+----------
+The metadata for each container in Swift is stored in an SQLite database. This
+metadata includes: information about the container such as its name,
+modification time and current object count; user metadata that may been written
+to the container by clients; a record of every object in the container. The
+container database object records are used to generate container listings in
+response to container GET requests; each object record stores the object's
+name, size, hash and content-type as well as associated timestamps.
+
+As the number of objects in a container increases then the number of object
+records in the container database increases. Eventually the container database
+performance starts to degrade and the time taken to update an object record
+increases. This can result in object updates timing out, with a corresponding
+increase in the backlog of pending :ref:`asynchronous updates
+` on object servers. Container databases are typically
+replicated on several nodes and any database performance degradation can also
+result in longer :doc:`container replication ` times.
+
+The point at which container database performance starts to degrade depends
+upon the choice of hardware in the container ring. Anecdotal evidence suggests
+that containers with tens of millions of object records have noticeably
+degraded performance.
+
+This performance degradation can be avoided by ensuring that clients use an
+object naming scheme that disperses objects across a number of containers
+thereby distributing load across a number of container databases. However, that
+is not always desirable nor is it under the control of the cluster operator.
+
+Swift's container sharding feature provides the operator with a mechanism to
+distribute the load on a single client-visible container across multiple,
+hidden, shard containers, each of which stores a subset of the container's
+object records. Clients are unaware of container sharding; clients continue to
+use the same API to access a container that, if sharded, maps to a number of
+shard containers within the Swift cluster.
+
+------------------------
+Deployment and operation
+------------------------
+
+Upgrade Considerations
+----------------------
+
+It is essential that all servers in a Swift cluster have been upgraded to
+support the container sharding feature before attempting to shard a container.
+
+Identifying containers in need of sharding
+------------------------------------------
+
+Container sharding is currently initiated by the ``swift-manage-shard-ranges``
+CLI tool :ref:`described below `. Operators must
+first identify containers that are candidates for sharding. To assist with
+this, the :ref:`sharder_daemon` inspects the size of containers that it visits
+and writes a list of sharding candidates to recon cache. For example::
+
+ "sharding_candidates": {
+ "found": 1,
+ "top": [
+ {
+ "account": "AUTH_test",
+ "container": "c1",
+ "file_size": 497763328,
+ "meta_timestamp": "1525346445.31161",
+ "node_index": 2,
+ "object_count": 3349028,
+ "path": ,
+ "root": "AUTH_test/c1"
+ }
+ ]
+ }
+
+A container is considered to be a sharding candidate if its object count is
+greater than or equal to the ``shard_container_threshold`` option.
+The number of candidates reported is limited to a number configured by the
+``recon_candidates_limit`` option such that only the largest candidate
+containers are included in the ``sharding_candidate`` data.
+
+
+.. _swift-manage-shard-ranges:
+
+``swift-manage-shard-ranges`` CLI tool
+--------------------------------------
+
+The ``swift-manage-shard-ranges`` tool provides commands for initiating
+sharding of a container. ``swift-manage-shard-ranges`` operates directly on a
+container database file.
+
+.. note::
+
+ ``swift-manage-shard-ranges`` must only be used on one replica of a
+ container database to avoid inconsistent results. The modifications made by
+ ``swift-manage-shard-ranges`` will be automatically copied to other
+ replicas of the container database via normal replication processes.
+
+There are three steps in the process of initiating sharding, each of which may
+be performed in isolation or, as shown below, using a single command.
+
+#. The ``find`` sub-command scans the container database to identify how many
+ shard containers will be required and which objects they will manage. Each
+ shard container manages a range of the object namespace defined by a
+ ``lower`` and ``upper`` bound. The maximum number of objects to be allocated
+ to each shard container is specified on the command line. For example::
+
+ $ swift-manage-shard-ranges find 500000
+ Loaded db broker for AUTH_test/c1.
+ [
+ {
+ "index": 0,
+ "lower": "",
+ "object_count": 500000,
+ "upper": "o_01086834"
+ },
+ {
+ "index": 1,
+ "lower": "o_01086834",
+ "object_count": 500000,
+ "upper": "o_01586834"
+ },
+ {
+ "index": 2,
+ "lower": "o_01586834",
+ "object_count": 500000,
+ "upper": "o_02087570"
+ },
+ {
+ "index": 3,
+ "lower": "o_02087570",
+ "object_count": 500000,
+ "upper": "o_02587572"
+ },
+ {
+ "index": 4,
+ "lower": "o_02587572",
+ "object_count": 500000,
+ "upper": "o_03087572"
+ },
+ {
+ "index": 5,
+ "lower": "o_03087572",
+ "object_count": 500000,
+ "upper": "o_03587572"
+ },
+ {
+ "index": 6,
+ "lower": "o_03587572",
+ "object_count": 349194,
+ "upper": ""
+ }
+ ]
+ Found 7 ranges in 4.37222s (total object count 3349194)
+
+ This command returns a list of shard ranges each of which describes the
+ namespace to be managed by a shard container. No other action is taken by
+ this command and the container database is unchanged. The output may be
+ redirected to a file for subsequent retrieval by the ``replace`` command.
+ For example::
+
+ $ swift-manage-shard-ranges find 500000 > my_shard_ranges
+ Loaded db broker for AUTH_test/c1.
+ Found 7 ranges in 2.448s (total object count 3349194)
+
+#. The ``replace`` sub-command deletes any shard ranges that might already be
+ in the container database and inserts shard ranges from a given file. The
+ file contents should be in the format generated by the ``find`` sub-command.
+ For example::
+
+ $ swift-manage-shard-ranges replace my_shard_ranges
+ Loaded db broker for AUTH_test/c1.
+ No shard ranges found to delete.
+ Injected 7 shard ranges.
+ Run container-replicator to replicate them to other nodes.
+ Use the enable sub-command to enable sharding.
+
+ The container database is modified to store the shard ranges, but the
+ container will not start sharding until sharding is enabled. The ``info``
+ sub-command may be used to inspect the state of the container database at
+ any point, and the ``show`` sub-command may be used to display the inserted
+ shard ranges.
+
+ Shard ranges stored in the container database may be replaced using the
+ ``replace`` sub-command. This will first delete all existing shard ranges
+ before storing new shard ranges. Shard ranges may also be deleted from the
+ container database using the ``delete`` sub-command.
+
+ Shard ranges should not be replaced or deleted using
+ ``swift-manage-shard-ranges`` once the next step of enabling sharding has
+ been taken.
+
+#. The ``enable`` sub-command enables the container for sharding. The sharder
+ daemon and/or container replicator daemon will replicate shard ranges to
+ other replicas of the container db and the sharder daemon will proceed to
+ shard the container. This process may take some time depending on the size
+ of the container, the number of shard ranges and the underlying hardware.
+
+.. note::
+
+ Once the ``enable`` sub-command has been used there is no supported
+ mechanism to revert sharding. Do not use ``swift-manage-shard-ranges`` to
+ make any further changes to the shard ranges in the container db.
+
+ For example::
+
+ $ swift-manage-shard-ranges enable
+ Loaded db broker for AUTH_test/c1.
+ Container moved to state 'sharding' with epoch 1525345093.22908.
+ Run container-sharder on all nodes to shard the container.
+
+ This does not shard the container - sharding is performed by the
+ :ref:`sharder_daemon` - but sets the necessary state in the database for the
+ daemon to subsequently start the sharding process.
+
+ The ``epoch`` value displayed in the output is the time at which sharding
+ was enabled. When the :ref:`sharder_daemon` starts sharding this container
+ it creates a new container database file using the epoch in the filename to
+ distinguish it from the retiring DB that is being sharded.
+
+All three steps may be performed with one sub-command::
+
+ $ swift-manage-shard-ranges find_and_replace 500000 --enable --force
+ Loaded db broker for AUTH_test/c1.
+ No shard ranges found to delete.
+ Injected 7 shard ranges.
+ Run container-replicator to replicate them to other nodes.
+ Container moved to state 'sharding' with epoch 1525345669.46153.
+ Run container-sharder on all nodes to shard the container.
+
+.. _sharder_daemon:
+
+``container-sharder`` daemon
+----------------------------
+
+Once sharding has been enabled for a container, the act of sharding is
+performed by the :ref:`container-sharder`. The :ref:`container-sharder` daemon
+must be running on all container servers. The ``container-sharder`` daemon
+periodically visits each container database to perform any container sharding
+tasks that are required.
+
+The ``container-sharder`` daemon requires a ``[container-sharder]`` config
+section to exist in the container server configuration file; a sample config
+section is shown in the `container-server.conf-sample` file.
+
+.. note::
+
+ Several of the ``[container-sharder]`` config options are only significant
+ when the ``auto_shard`` option is enabled. This option enables the
+ ``container-sharder`` daemon to automatically identify containers that are
+ candidates for sharding and initiate the sharding process, instead of using
+ the ``swift-manage-shard-ranges`` tool. The ``auto_shard`` option is
+ currently NOT recommended for production systems and shoud be set to
+ ``false`` (the default value).
+
+The container sharder uses an internal client and therefore requires an
+internal client configuration file to exist. By default the internal-client
+configuration file is expected to be found at
+`/etc/swift/internal-client.conf`. An alternative location for the
+configuration file may be specified using the ``internal_client_conf_path``
+option in the ``[container-sharder]`` config section.
+
+The content of the internal-client configuration file should be the same as the
+`internal-client.conf-sample` file. In particular, the internal-client
+configuration should have::
+
+ account_autocreate = True
+
+in the ``[proxy-server]`` section.
+
+A container database may require several visits by the ``container-sharder``
+daemon before it is fully sharded. On each visit the ``container-sharder``
+daemon will move a subset of object records to new shard containers by cleaving
+new shard container databases from the original. By default, two shards are
+processed per visit; this number may be configured by the ``cleave_batch_size``
+option.
+
+The ``container-sharder`` daemon periodically writes progress data for
+containers that are being sharded to recon cache. For example::
+
+ "sharding_in_progress": {
+ "all": [
+ {
+ "account": "AUTH_test",
+ "active": 0,
+ "cleaved": 2,
+ "container": "c1",
+ "created": 5,
+ "db_state": "sharding",
+ "error": null,
+ "file_size": 26624,
+ "found": 0,
+ "meta_timestamp": "1525349617.46235",
+ "node_index": 1,
+ "object_count": 3349030,
+ "path": ,
+ "root": "AUTH_test/c1",
+ "state": "sharding"
+ }
+ ]
+ }
+
+This example indicates that from a total of 7 shard ranges, 2 have been cleaved
+whereas 5 remain in created state waiting to be cleaved.
+
+Shard containers are created in an internal account and not visible to clients.
+By default, shard containers for an account ``AUTH_test`` are created in the
+internal account ``.shards_AUTH_test``.
+
+Once a container has started sharding, object updates to that container may be
+redirected to the shard container. The ``container-sharder`` daemon is also
+responsible for sending updates of a shard's object count and bytes_used to the
+original container so that aggegrate object count and bytes used values can be
+returned in responses to client requests.
+
+.. note::
+
+ The ``container-sharder`` daemon must continue to run on all container
+ servers in order for shards object stats updates to be generated.
+
+
+--------------
+Under the hood
+--------------
+
+Terminology
+-----------
+
+================== ==================================================
+Name Description
+================== ==================================================
+Root container The original container that lives in the
+ user's account. It holds references to its
+ shard containers.
+Retiring DB The original database file that is to be sharded.
+Fresh DB A database file that will replace the retiring
+ database.
+Shard range A range of the object namespace defined by a lower
+ bound and and upper bound.
+Shard container A container that holds object records for a shard
+ range. Shard containers exist a hidden account
+ mirroring the user's account.
+Misplaced objects Items that don't belong in a container's shard
+ range. These will be moved to their correct
+ location by the container-sharder.
+Cleaving The act of moving object records within a shard
+ range to a shard container database.
+Shrinking The act of merging a small shard container into
+ another shard container in order to delete the
+ small shard container.
+Donor The shard range that is shrinking away.
+Acceptor The shard range into which a donor is merged.
+================== ==================================================
+
+
+Finding shard ranges
+--------------------
+
+The end goal of sharding a container is to replace the original container
+database which has grown very large with a number of shard container databases,
+each of which is responsible for storing a range of the entire object
+namespace. The first step towards achieving this is to identify an appropriate
+set of contiguous object namespaces, known as shard ranges, each of which
+contains a similar sized portion of the container's current object content.
+
+Shard ranges cannot simply be selected by sharding the namespace uniformly,
+because object names are not guaranteed to be distributed uniformly. If the
+container were naively sharded into two shard ranges, one containing all
+object names up to `m` and the other containing all object names beyond `m`,
+then if all object names actually start with `o` the outcome would be an
+extremely unbalanced pair of shard containers.
+
+It is also too simplistic to assume that every container that requires sharding
+can be sharded into two. This might be the goal in the ideal world, but in
+practice there will be containers that have grown very large and should be
+sharded into many shards. Furthermore, the time required to find the exact
+mid-point of the existing object names in a large SQLite database would
+increase with container size.
+
+For these reasons, shard ranges of size `N` are found by searching for the
+`Nth` object in the database table, sorted by object name, and then searching
+for the `(2 * N)th` object, and so on until all objects have been searched. For
+a container that has exactly `2N` objects, the end result is the same as
+sharding the container at the midpoint of its object names. In practice
+sharding would typically be enabled for containers with great than `2N` objects
+and more than two shard ranges will be found, the last one probably containing
+less than `N` objects. With containers having large multiples of `N` objects,
+shard ranges can be identified in batches which enables more scalable solution.
+
+To illustrate this process, consider a very large container in a user account
+``acct`` that is a candidate for sharding:
+
+.. image:: images/sharding_unsharded.svg
+
+The :ref:`swift-manage-shard-ranges` tool ``find`` sub-command searches the
+object table for the `Nth` object whose name will become the upper bound of the
+first shard range, and the lower bound of the second shard range. The lower
+bound of the first shard range is the empty string.
+
+For the purposes of this example the first upper bound is `cat`:
+
+.. image:: images/sharding_scan_basic.svg
+
+:ref:`swift-manage-shard-ranges` continues to search the container to find
+further shard ranges, with the final upper bound also being the empty string.
+
+Enabling sharding
+-----------------
+
+Once shard ranges have been found the :ref:`swift-manage-shard-ranges`
+``replace`` sub-command is used to insert them into the `shard_ranges` table
+of the container database. In addition to its lower and upper bounds, each
+shard range is given a name. The name takes the form ``a/c`` where ``a`` is an
+account name formed by prefixing the user account with the string
+``.shards_``, and ``c`` is a container name that is derived from the original
+container and includes the index of the shard range. The final container name
+for the shard range uses the pattern of ``{original contianer name}-{hash of
+parent container}-{timestamp}-{shard index}``.
+
+The ``enable`` sub-command then creates some final state required to initiate
+sharding the container, including a special shard range record referred to as
+the container's `own_shard_range` whose name is equal to the container's path.
+This is used to keep a record of the object namespace that the container
+covers, which for user containers is always the entire namespace.
+
+The :class:`~swift.common.utils.ShardRange` class
+-------------------------------------------------
+
+The :class:`~swift.common.utils.ShardRange` class provides methods for
+interactng with the attributes and state of a shard range. The class
+encapsulates the following properties:
+
+* The name of the shard range which is also the name of the shard container
+ used to hold object records in its namespace.
+* Lower and upper bounds which define the object namespace of the shard range.
+* A deleted flag.
+* A timestamp at which the bounds and deleted flag were last modified.
+* The object stats for the shard range i.e. object count and bytes used.
+* A timestamp at which the object stats were last modified.
+* The state of the shard range, and an epoch, which is the timestamp used in
+ the shard container's database file name.
+* A timestamp at which the state and epoch were last modified.
+
+A shard range progresses through the following states:
+
+* FOUND: the shard range has been identified in the container that is to be
+ sharded but no resources have been created for it.
+* CREATED: A shard container has been created to store the contents of the
+ shard range.
+* CLEAVED: the sharding container's contents for the shard range have been
+ copied to the shard container from *at least one replica* of the sharding
+ container.
+* ACTIVE: shard ranges move to this state when all shard ranges in a sharding
+ container have been cleaved.
+* SHRINKING: the shard range has been enabled for shrinking; or
+* SHARDING: the shard range has been enabled for sharding.
+* SHARDED: the shard range has completed sharding or shrinking.
+
+..note::
+
+ Shard range state represents the most advanced state of the shard range on
+ any replica of the container. For example, a shard range in CLEAVED state
+ may not have completed cleaving on all replicas but has cleaved on at least
+ one replica.
+
+Fresh and retiring database files
+---------------------------------
+
+As alluded to earlier, writing to a large container causes increased latency
+for the container servers. Once sharding has been initiated on a container it
+is desirable to stop writing to the large database; ultimately it will be
+unlinked. This is primarily achieved by redirecting object updates to new shard
+containers as they are created (see :ref:`redirecting_updates` below), but some
+object updates may still need to be accepted by the root container and other
+container metadata must still be modifiable.
+
+To render the large `retiring` database effectively read-only, when the
+:ref:`sharder_daemon` finds a container with a set of shard range records,
+including an `own_shard_range`, it first creates a fresh database file which
+will ultimately replace the existing `retiring` database. For a retiring db
+whose filename is::
+
+ .db
+
+the fresh database file name is of the form::
+
+ _.db
+
+where epoch is a timestamp stored in the container's `own_shard_range`.
+
+The fresh DB has a copy of the shard ranges table from the retiring DB and all
+other container metadata apart from the object records. Once a fresh DB file
+has been created it is used to store any new object updates and no more object
+records are written to the retiring DB file.
+
+Once the sharding process has completed, the retiring DB file will be unlinked
+leaving only the fresh DB file in the container's directory. There are
+therefore three states that the container DB directory may be in during the
+sharding process: UNSHARDED, SHARDING and SHARDED.
+
+.. image:: images/sharding_db_states.svg
+
+If the container ever shrink to the point that is has no shards then the fresh
+DB starts to store object records, behaving the same as an unsharded container.
+This is known as the COLLAPSED state.
+
+In summary, the DB states that any container replica may be in are:
+
+- UNSHARDED - In this state there is just one standard container database. All
+ containers are originally in this state.
+- SHARDING - There are now two databases, the retiring database and a fresh
+ database. The fresh database stores any metadata, container level stats,
+ an object holding table, and a table that stores shard ranges.
+- SHARDED - There is only one database, the fresh database, which has one or
+ more shard ranges in addition to its own shard range. The retiring database
+ has been unlinked.
+- COLLAPSED - There is only one database, the fresh database, which has only
+ its its own shard range and store object records.
+
+.. note::
+
+ DB state is unique to each replica of a container and is not necessarily
+ synchronised with shard range state.
+
+
+Creating shard containers
+-------------------------
+
+The :ref:`sharder_daemon` next creates a shard container for each shard range
+using the shard range name as the name of the shard container:
+
+.. image:: /images/sharding_cleave_basic.svg
+
+Shard containers now exist with a unique name and placed in a hidden account
+that maps to the user account (`.shards_acct`). This avoids namespace
+collisions and also keeps all the shard containers out of view from users of
+the account. Each shard container has an `own_shard_range` record which has the
+lower and upper bounds of the object namespace for which it is responsible, and
+a reference to the sharding user container, which is referred to as the
+`root_container`. Unlike the `root_container`, the shard container's
+`own_shard_range` does not cover the entire namepsace.
+
+Cleaving shard containers
+-------------------------
+
+Having created empty shard containers the sharder daemon will proceed to cleave
+objects from the retiring database to each shard range. Cleaving occurs in
+batches of two (by default) shard ranges, so if a container has more than two
+shard ranges then the daemon must visit it multiple times to complete cleaving.
+
+To cleave a shard range the daemon creates a shard database for the shard
+container on a local device. This device may be one of the shard container's
+primary nodes but often it will not. Object records from the corresponding
+shard range namespace are then copied from the retiring DB to this shard DB.
+
+Swift's container replication mechanism is then used to replicate the shard DB
+to its primary nodes. Checks are made to ensure that the new shard container DB
+has been replicated to a sufficient number of its primary nodes before it is
+considered to have been successfully cleaved. By default the daemon requires
+successful replication of a new shard broker to at least a quorum of the
+container rings replica count, but this requirement can be tuned using the
+``shard_replication_quorum`` option.
+
+Once a shard range has been succesfully cleaved from a retiring database the
+daemon transitions its state to ``CLEAVED``. It should be noted that this state
+transition occurs as soon as any one of the retiring DB replicas has cleaved
+the shard range, and therefore does not imply that all retiring DB replicas
+have cleaved that range. The significance of the state transition is that the
+shard container is now considered suitable for contributing to object listings,
+since its contents are present on a quorum of its primary nodes and are the
+same as at least one of the retiring DBs for that namespace.
+
+Once a shard range is in the ``CLEAVED`` state, the requirement for
+'successful' cleaving of other instances of the retirng DB may optionally be
+relaxed since it is not so imperative that their contents are replicated
+*immediately* to their primary nodes. The ``existing_shard_replication_quorum``
+option can be used to reduce the quorum required for a cleaved shard range to
+be considered successfully replicated by the sharder daemon.
+
+.. note::
+
+ Once cleaved, shard container DBs will continue to be replicated by the
+ normal `container-replicator` daemon so that they will eventually be fully
+ replicated to all primary nodes regardless of any replication quorum options
+ used by the sharder daemon.
+
+The cleaving progress of each replica of a retiring DB must be
+tracked independently of the shard range state. This is done using a per-DB
+CleavingContext object that maintains a cleaving cursor for the retiring DB
+that it is associated with. The cleaving cursor is simply the upper bound of
+the last shard range to have been cleaved *from that particular retiring DB*.
+
+Each CleavingContext is stored in the sharding container's sysmeta under a key
+that is the ``id`` of the retiring DB. Since all container DB files have unique
+``id``s, this guarantees that each retiring DB will have a unique
+CleavingContext. Furthermore, if the retiring DB file is changed, for example
+by an rsync_then_merge replication operation which might change the contents of
+the DB's object table, then it will get a new unique CleavingContext.
+
+A CleavingContext maintains other state that is used to ensure that a retiring
+DB is only considered to be fully cleaved, and ready to be deleted, if *all* of
+its object rows have been cleaved to a shard range.
+
+Once all shard ranges have been cleaved from the retiring DB it is deleted. The
+container is now represented by the fresh DB which has a table of shard range
+records that point to the shard containers that store the container's object
+records.
+
+.. _redirecting_updates:
+
+Redirecting object updates
+--------------------------
+
+Once a shard container exists, object updates arising from new client requests
+and async pending files are directed to the shard container instead of the root
+container. This takes load off of the root container.
+
+For a sharded (or partially sharded) container, when the proxy receives a new
+object request it issues a GET request to the container for data describing a
+shard container to which the object update should be sent. The proxy then
+annotates the object request with the shard container location so that the
+object server will forward object updates to the shard container. If those
+updates fail then the async pending file that is written on the object server
+contains the shard container location.
+
+When the object updater processes async pending files for previously failed
+object updates, it may not find a shard container location. In this case the
+updater sends the update to the `root container`, which returns a redirection
+response with the shard container location.
+
+.. note::
+
+ Object updates are directed to shard containers as soon as they exist, even
+ if the retiring DB object records have not yet been cleaved to the shard
+ container. This prevents further writes to the retiring DB and also avoids
+ the fresh DB being polluted by new object updates. The goal is to
+ ultimately have all object records in the shard containers and none in the
+ root container.
+
+Building container listings
+---------------------------
+
+Listing requests for a sharded container are handled by querying the shard
+containers for components of the listing. The proxy forwards the client listing
+request to the root container, as it would for an unsharded container, but the
+container server responds with a list of shard ranges rather than objects. The
+proxy then queries each shard container in namespace order for their listing,
+until either the listing length limit is reached or all shard ranges have been
+listed.
+
+While a container is still in the process of sharding, only *cleaved* shard
+ranges are used when building a container listing. Shard ranges that have not
+yet cleaved will not have any object records from the root container. The root
+container continues to provide listings for the uncleaved part of its
+namespace.
+
+..note::
+
+ New object updates are redirected to shard containers that have not yet been
+ cleaved. These updates will not threfore be included in container listings
+ until their shard range has been cleaved.
+
+Example request redirection
+---------------------------
+
+As an example, consider a sharding container in which 3 shard ranges have been
+found ending in cat, giraffe and igloo. Their respective shard containers have
+been created so update requests for objects up to "igloo" are redirected to the
+appropriate shard container. The root DB continues to handle listing requests
+and update requests for any object name beyond "igloo".
+
+.. image:: images/sharding_scan_load.svg
+
+The sharder daemon cleaves objects from the retiring DB to the shard range DBs;
+it also moves any misplaced objects from the root container's fresh DB to the
+shard DB. Cleaving progress is represented by the blue line. Once the first
+shard range has been cleaved listing requests for that namespace are directed
+to the shard container. The root container still provides listings for the
+remainder of the namespace.
+
+.. image:: images/sharding_cleave1_load.svg
+
+The process continues: the sharder cleaves the next range and a new range is
+found with upper bound of "linux". Now the root container only needs to handle
+listing requests up to "giraffe" and update requests for objects whose name is
+greater than "linux". Load will continue to diminish on the root DB and be
+dispersed across the shard DBs.
+
+.. image:: images/sharding_cleave2_load.svg
+
+
+Container replication
+---------------------
+
+Shard range records are replicated between container DB replicas in much the
+same way as object records are for unsharded containers. However, the usual
+replication of object records between replicas of a container is halted as soon
+as a container is capable of being sharded. Instead, object records are moved
+to their new locations in shard containers. This avoids unnecessary replication
+traffic between container replicas.
+
+To facilitate this, shard ranges are both 'pushed' and 'pulled' during
+replication, prior to any attempt to replicate objects. This means that the
+node initiating replication learns about shard ranges from the destination node
+early during the replication process and is able to skip object replication if
+it discovers that it has shard ranges and is able to shard.
+
+.. note::
+
+ When the destination DB for container replication is missing then the
+ 'complete_rsync' replication mechanism is still used and in this case only
+ both object records and shard range records are copied to the destination
+ node.
+
+Container deletion
+------------------
+
+Sharded containers may be deleted by a ``DELETE`` request just like an
+unsharded container. A sharded container must be empty before it can be deleted
+which implies that all of its shard containers must have reported that they are
+empty.
+
+Shard containers are *not* immediately deleted when their root container is
+deleted; the shard containers remain undeleted so that they are able to
+continue to receive object updates that might arrive after the root container
+has been deleted. Shard containers continue to update their deleted root
+container with their object stats. If a shard container does receive object
+updates that cause it to no longer be empty then the root container will no
+longer be considered deleted once that shard container sends an object stats
+update.
+
+
+Sharding a shard container
+--------------------------
+
+A shard container may grow to a size that requires it to be sharded.
+``swift-manage-shard-ranges`` may be used to identify shard ranges within a
+shard container and enable sharding in the same way as for a root container.
+When a shard is sharding it notifies the root of its shard ranges so that the
+root can start to redirect object updates to the new 'sub-shards'. When the
+shard has completed sharding the root is aware of all the new sub-shards and
+the sharding shard deletes its shard range record in the root container shard
+ranges table. At this point the root is aware of all the new sub-shards which
+collectively cover the namespace of the now-deleted shard.
+
+There is no hierarchy of shards beyond the root and its immediate shards. When
+a shard shards, its sub-shards are effectively re-parented with the root
+container.
+
+
+Shrinking a shard container
+---------------------------
+
+A shard's contents may reduce to a point where the shard is no longer required.
+If this happens then the shard may be shrunk into another shard range.
+Shrinking is achieved in a similar way to sharding: an 'acceptor' shard range
+is written to the shrinking shard container's shard ranges table; unlike
+sharding, where shard ranges each cover a subset of the sharding container's
+namespace, the acceptor shard range is a superset of the shrinking shard range.
+
+Once given an acceptor shard range the shrinking shard will cleave itself to
+its acceptor, and then delete itself from the root container shard ranges
+table.
diff --git a/etc/container-server.conf-sample b/etc/container-server.conf-sample
index 4059e39418..7d38deb0c5 100644
--- a/etc/container-server.conf-sample
+++ b/etc/container-server.conf-sample
@@ -69,6 +69,10 @@ bind_port = 6201
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
+#
+# The prefix used for hidden auto-created accounts, for example accounts in
+# which shard containers are created. Defaults to '.'.
+# auto_create_account_prefix = .
[pipeline:main]
pipeline = healthcheck recon container-server
@@ -323,3 +327,117 @@ use = egg:swift#xprofile
#
# unwind the iterator of applications
# unwind = false
+
+[container-sharder]
+# You can override the default log routing for this app here (don't use set!):
+# log_name = container-sharder
+# log_facility = LOG_LOCAL0
+# log_level = INFO
+# log_address = /dev/log
+#
+# Container sharder specific settings
+#
+# If the auto_shard option is true then the sharder will automatically select
+# containers to shard, scan for shard ranges, and select shards to shrink.
+# The default is false.
+# Warning: auto-sharding is still under development and should not be used in
+# production; do not set this option to true in a production cluster.
+# auto_shard = false
+#
+# When auto-sharding is enabled shard_container_threshold defines the object
+# count at which a container with container-sharding enabled will start to
+# shard. shard_container_threshold also indirectly determines the initial
+# nominal size of shard containers, which is shard_container_threshold // 2, as
+# well as determining the thresholds for shrinking and merging shard
+# containers.
+# shard_container_threshold = 1000000
+#
+# When auto-sharding is enabled shard_shrink_point defines the object count
+# below which a 'donor' shard container will be considered for shrinking into
+# another 'acceptor' shard container. shard_shrink_point is a percentage of
+# shard_container_threshold e.g. the default value of 5 means 5% of the
+# shard_container_threshold.
+# shard_shrink_point = 5
+#
+# When auto-sharding is enabled shard_shrink_merge_point defines the maximum
+# allowed size of an acceptor shard container after having a donor merged into
+# it. Shard_shrink_merge_point is a percentage of shard_container_threshold.
+# e.g. the default value of 75 means that the projected sum of a donor object
+# count and acceptor count must be less than 75% of shard_container_threshold
+# for the donor to be allowed to merge into the acceptor.
+#
+# For example, if the shard_container_threshold is 1 million,
+# shard_shrink_point is 5, and shard_shrink_merge_point is 75 then a shard will
+# be considered for shrinking if it has less than or equal to 50 thousand
+# objects but will only merge into an acceptor if the combined object count
+# would be less than or equal to 750 thousand objects.
+# shard_shrink_merge_point = 75
+#
+# When auto-sharding is enabled shard_scanner_batch_size defines the maximum
+# number of shard ranges that will be found each time the sharder daemon visits
+# a sharding container. If necessary the sharder daemon will continue to search
+# for more shard ranges each time it visits the container.
+# shard_scanner_batch_size = 10
+#
+# cleave_batch_size defines the number of shard ranges that will be cleaved
+# each time the sharder daemon visits a sharding container.
+# cleave_batch_size = 2
+#
+# cleave_row_batch_size defines the size of batches of object rows read from a
+# sharding container and merged to a shard container during cleaving.
+# cleave_row_batch_size = 10000
+#
+# Defines the number of successfully replicated shard dbs required when
+# cleaving a previously uncleaved shard range before the sharder will progress
+# to the next shard range. The value should be less than or equal to the
+# container ring replica count. The default of 'auto' causes the container ring
+# quorum value to be used. This option only applies to the container-sharder
+# replication and does not affect the number of shard container replicas that
+# will eventually be replicated by the container-replicator.
+# shard_replication_quorum = auto
+#
+# Defines the number of successfully replicated shard dbs required when
+# cleaving a shard range that has been previously cleaved on another node
+# before the sharder will progress to the next shard range. The value should be
+# less than or equal to the container ring replica count. The default of 'auto'
+# causes the shard_replication_quorum value to be used. This option only
+# applies to the container-sharder replication and does not affect the number
+# of shard container replicas that will eventually be replicated by the
+# container-replicator.
+# existing_shard_replication_quorum = auto
+#
+# The sharder uses an internal client to create and make requests to
+# containers. The absolute path to the client config file can be configured.
+# internal_client_conf_path = /etc/swift/internal-client.conf
+#
+# The number of time the internal client will retry requests.
+# request_tries = 3
+#
+# Each time the sharder dumps stats to the recon cache file it includes a list
+# of containers that appear to need sharding but are not yet sharding. By
+# default this list is limited to the top 5 containers, ordered by object
+# count. The limit may be changed by setting recon_candidates_limit to an
+# integer value. A negative value implies no limit.
+# recon_candidates_limit = 5
+#
+# Large databases tend to take a while to work with, but we want to make sure
+# we write down our progress. Use a larger-than-normal broker timeout to make
+# us less likely to bomb out on a LockTimeout.
+# broker_timeout = 60
+#
+# Time in seconds to wait between sharder cycles
+# interval = 30
+#
+# The container-sharder accepts the following configuration options as defined
+# in the container-replicator section:
+#
+# per_diff = 1000
+# max_diffs = 100
+# concurrency = 8
+# node_timeout = 10
+# conn_timeout = 0.5
+# reclaim_age = 604800
+# rsync_compress = no
+# rsync_module = {replication_ip}::container
+# recon_cache_path = /var/cache/swift
+#
diff --git a/setup.cfg b/setup.cfg
index 7ed7f1ec17..bc6b1a07c0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -36,6 +36,7 @@ scripts =
bin/swift-container-info
bin/swift-container-replicator
bin/swift-container-server
+ bin/swift-container-sharder
bin/swift-container-sync
bin/swift-container-updater
bin/swift-container-reconciler
@@ -71,6 +72,9 @@ keystone =
keystonemiddleware>=4.17.0
[entry_points]
+console_scripts =
+ swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main
+
paste.app_factory =
proxy = swift.proxy.server:app_factory
object = swift.obj.server:app_factory
diff --git a/swift/account/backend.py b/swift/account/backend.py
index 2734548cf0..1ff940d4f6 100644
--- a/swift/account/backend.py
+++ b/swift/account/backend.py
@@ -22,7 +22,7 @@ import six.moves.cPickle as pickle
import sqlite3
from swift.common.utils import Timestamp
-from swift.common.db import DatabaseBroker, utf8encode
+from swift.common.db import DatabaseBroker, utf8encode, zero_like
DATADIR = 'accounts'
@@ -233,7 +233,7 @@ class AccountBroker(DatabaseBroker):
with self.get() as conn:
row = conn.execute(
'SELECT container_count from account_stat').fetchone()
- return (row[0] == 0)
+ return zero_like(row[0])
def make_tuple_for_pickle(self, record):
return (record['name'], record['put_timestamp'],
@@ -254,7 +254,7 @@ class AccountBroker(DatabaseBroker):
:param storage_policy_index: the storage policy for this container
"""
if Timestamp(delete_timestamp) > Timestamp(put_timestamp) and \
- object_count in (None, '', 0, '0'):
+ zero_like(object_count):
deleted = 1
else:
deleted = 0
@@ -273,8 +273,7 @@ class AccountBroker(DatabaseBroker):
:returns: True if the DB is considered to be deleted, False otherwise
"""
- return status == 'DELETED' or (
- container_count in (None, '', 0, '0') and
+ return status == 'DELETED' or zero_like(container_count) and (
Timestamp(delete_timestamp) > Timestamp(put_timestamp))
def _is_deleted(self, conn):
@@ -509,7 +508,7 @@ class AccountBroker(DatabaseBroker):
record[2] = row[2]
# If deleted, mark as such
if Timestamp(record[2]) > Timestamp(record[1]) and \
- record[3] in (None, '', 0, '0'):
+ zero_like(record[3]):
record[5] = 1
else:
record[5] = 0
diff --git a/swift/cli/info.py b/swift/cli/info.py
index 0eee781ba6..1969435285 100644
--- a/swift/cli/info.py
+++ b/swift/cli/info.py
@@ -298,6 +298,27 @@ def print_db_info_metadata(db_type, info, metadata, drop_prefixes=False):
else:
print('No user metadata found in db file')
+ if db_type == 'container':
+ print('Sharding Metadata:')
+ shard_type = 'root' if info['is_root'] else 'shard'
+ print(' Type: %s' % shard_type)
+ print(' State: %s' % info['db_state'])
+ if info.get('shard_ranges'):
+ print('Shard Ranges (%d):' % len(info['shard_ranges']))
+ for srange in info['shard_ranges']:
+ srange = dict(srange, state_text=srange.state_text)
+ print(' Name: %(name)s' % srange)
+ print(' lower: %(lower)r, upper: %(upper)r' % srange)
+ print(' Object Count: %(object_count)d, Bytes Used: '
+ '%(bytes_used)d, State: %(state_text)s (%(state)d)'
+ % srange)
+ print(' Created at: %s (%s)'
+ % (Timestamp(srange['timestamp']).isoformat,
+ srange['timestamp']))
+ print(' Meta Timestamp: %s (%s)'
+ % (Timestamp(srange['meta_timestamp']).isoformat,
+ srange['meta_timestamp']))
+
def print_obj_metadata(metadata, drop_prefixes=False):
"""
@@ -406,7 +427,13 @@ def print_info(db_type, db_file, swift_dir='/etc/swift', stale_reads_ok=False,
raise InfoSystemExit()
raise
account = info['account']
- container = info['container'] if db_type == 'container' else None
+ container = None
+ if db_type == 'container':
+ container = info['container']
+ info['is_root'] = broker.is_root_container()
+ sranges = broker.get_shard_ranges()
+ if sranges:
+ info['shard_ranges'] = sranges
print_db_info_metadata(db_type, info, broker.metadata, drop_prefixes)
try:
ring = Ring(swift_dir, ring_name=db_type)
diff --git a/swift/cli/manage_shard_ranges.py b/swift/cli/manage_shard_ranges.py
new file mode 100644
index 0000000000..acbc364968
--- /dev/null
+++ b/swift/cli/manage_shard_ranges.py
@@ -0,0 +1,370 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy
+# of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+import argparse
+import json
+import sys
+import time
+
+from six.moves import input
+
+from swift.common.utils import Timestamp, get_logger, ShardRange
+from swift.container.backend import ContainerBroker, UNSHARDED
+from swift.container.sharder import make_shard_ranges, sharding_enabled, \
+ CleavingContext
+
+
+def _load_and_validate_shard_data(args):
+ try:
+ with open(args.input, 'rb') as fd:
+ try:
+ data = json.load(fd)
+ if not isinstance(data, list):
+ raise ValueError('Shard data must be a list of dicts')
+ for k in ('lower', 'upper', 'index', 'object_count'):
+ for shard in data:
+ shard[k]
+ return data
+ except (TypeError, ValueError, KeyError) as err:
+ print('Failed to load valid shard range data: %r' % err,
+ file=sys.stderr)
+ exit(2)
+ except IOError as err:
+ print('Failed to open file %s: %s' % (args.input, err),
+ file=sys.stderr)
+ exit(2)
+
+
+def _check_shard_ranges(own_shard_range, shard_ranges):
+ reasons = []
+
+ def reason(x, y):
+ if x != y:
+ reasons.append('%s != %s' % (x, y))
+
+ if not shard_ranges:
+ reasons.append('No shard ranges.')
+ else:
+ reason(own_shard_range.lower, shard_ranges[0].lower)
+ reason(own_shard_range.upper, shard_ranges[-1].upper)
+ for x, y in zip(shard_ranges, shard_ranges[1:]):
+ reason(x.upper, y.lower)
+
+ if reasons:
+ print('WARNING: invalid shard ranges: %s.' % reasons)
+ print('Aborting.')
+ exit(2)
+
+
+def _check_own_shard_range(broker, args):
+ # TODO: this check is weak - if the shards prefix changes then we may not
+ # identify a shard container. The goal is to not inadvertently create an
+ # entire namespace default shard range for a shard container.
+ is_shard = broker.account.startswith(args.shards_account_prefix)
+ own_shard_range = broker.get_own_shard_range(no_default=is_shard)
+ if not own_shard_range:
+ print('WARNING: shard container missing own shard range.')
+ print('Aborting.')
+ exit(2)
+ return own_shard_range
+
+
+def _find_ranges(broker, args, status_file=None):
+ start = last_report = time.time()
+ limit = 5 if status_file else -1
+ shard_data, last_found = broker.find_shard_ranges(
+ args.rows_per_shard, limit=limit)
+ if shard_data:
+ while not last_found:
+ if last_report + 10 < time.time():
+ print('Found %d ranges in %gs; looking for more...' % (
+ len(shard_data), time.time() - start), file=status_file)
+ last_report = time.time()
+ # prefix doesn't matter since we aren't persisting it
+ found_ranges = make_shard_ranges(broker, shard_data, '.shards_')
+ more_shard_data, last_found = broker.find_shard_ranges(
+ args.rows_per_shard, existing_ranges=found_ranges, limit=5)
+ shard_data.extend(more_shard_data)
+ return shard_data, time.time() - start
+
+
+def find_ranges(broker, args):
+ shard_data, delta_t = _find_ranges(broker, args, sys.stderr)
+ print(json.dumps(shard_data, sort_keys=True, indent=2))
+ print('Found %d ranges in %gs (total object count %s)' %
+ (len(shard_data), delta_t,
+ sum(r['object_count'] for r in shard_data)),
+ file=sys.stderr)
+ return 0
+
+
+def show_shard_ranges(broker, args):
+ shard_ranges = broker.get_shard_ranges(
+ include_deleted=getattr(args, 'include_deleted', False))
+ shard_data = [dict(sr, state=sr.state_text)
+ for sr in shard_ranges]
+
+ if not shard_data:
+ print("No shard data found.", file=sys.stderr)
+ elif getattr(args, 'brief', False):
+ print("Existing shard ranges:", file=sys.stderr)
+ print(json.dumps([(sd['lower'], sd['upper']) for sd in shard_data],
+ sort_keys=True, indent=2))
+ else:
+ print("Existing shard ranges:", file=sys.stderr)
+ print(json.dumps(shard_data, sort_keys=True, indent=2))
+ return 0
+
+
+def db_info(broker, args):
+ print('Sharding enabled = %s' % sharding_enabled(broker))
+ own_sr = broker.get_own_shard_range(no_default=True)
+ print('Own shard range: %s' %
+ (json.dumps(dict(own_sr, state=own_sr.state_text),
+ sort_keys=True, indent=2)
+ if own_sr else None))
+ db_state = broker.get_db_state()
+ print('db_state = %s' % db_state)
+ if db_state == 'sharding':
+ print('Retiring db id: %s' % broker.get_brokers()[0].get_info()['id'])
+ print('Cleaving context: %s' %
+ json.dumps(dict(CleavingContext.load(broker)),
+ sort_keys=True, indent=2))
+ print('Metadata:')
+ for k, (v, t) in broker.metadata.items():
+ print(' %s = %s' % (k, v))
+
+
+def delete_shard_ranges(broker, args):
+ shard_ranges = broker.get_shard_ranges()
+ if not shard_ranges:
+ print("No shard ranges found to delete.")
+ return 0
+
+ while not args.force:
+ print('This will delete existing %d shard ranges.' % len(shard_ranges))
+ if broker.get_db_state() != UNSHARDED:
+ print('WARNING: Be very cautious about deleting existing shard '
+ 'ranges. Deleting all ranges in this db does not guarantee '
+ 'deletion of all ranges on all replicas of the db.')
+ print(' - this db is in state %s' % broker.get_db_state())
+ print(' - %d existing shard ranges have started sharding' %
+ [sr.state != ShardRange.FOUND
+ for sr in shard_ranges].count(True))
+ choice = input('Do you want to show the existing ranges [s], '
+ 'delete the existing ranges [yes] '
+ 'or quit without deleting [q]? ')
+ if choice == 's':
+ show_shard_ranges(broker, args)
+ continue
+ elif choice == 'q':
+ return 1
+ elif choice == 'yes':
+ break
+ else:
+ print('Please make a valid choice.')
+ print()
+
+ now = Timestamp.now()
+ for sr in shard_ranges:
+ sr.deleted = 1
+ sr.timestamp = now
+ broker.merge_shard_ranges(shard_ranges)
+ print('Deleted %s existing shard ranges.' % len(shard_ranges))
+ return 0
+
+
+def _replace_shard_ranges(broker, args, shard_data, timeout=None):
+ own_shard_range = _check_own_shard_range(broker, args)
+ shard_ranges = make_shard_ranges(
+ broker, shard_data, args.shards_account_prefix)
+ _check_shard_ranges(own_shard_range, shard_ranges)
+
+ if args.verbose > 0:
+ print('New shard ranges to be injected:')
+ print(json.dumps([dict(sr) for sr in shard_ranges],
+ sort_keys=True, indent=2))
+
+ # Crank up the timeout in an effort to *make sure* this succeeds
+ with broker.updated_timeout(max(timeout, args.replace_timeout)):
+ delete_shard_ranges(broker, args)
+ broker.merge_shard_ranges(shard_ranges)
+
+ print('Injected %d shard ranges.' % len(shard_ranges))
+ print('Run container-replicator to replicate them to other nodes.')
+ if args.enable:
+ return enable_sharding(broker, args)
+ else:
+ print('Use the enable sub-command to enable sharding.')
+ return 0
+
+
+def replace_shard_ranges(broker, args):
+ shard_data = _load_and_validate_shard_data(args)
+ return _replace_shard_ranges(broker, args, shard_data)
+
+
+def find_replace_shard_ranges(broker, args):
+ shard_data, delta_t = _find_ranges(broker, args, sys.stdout)
+ # Since we're trying to one-shot this, and the previous step probably
+ # took a while, make the timeout for writing *at least* that long
+ return _replace_shard_ranges(broker, args, shard_data, timeout=delta_t)
+
+
+def _enable_sharding(broker, own_shard_range, args):
+ if own_shard_range.update_state(ShardRange.SHARDING):
+ own_shard_range.epoch = Timestamp.now()
+ own_shard_range.state_timestamp = own_shard_range.epoch
+
+ with broker.updated_timeout(args.enable_timeout):
+ broker.merge_shard_ranges([own_shard_range])
+ broker.update_metadata({'X-Container-Sysmeta-Sharding':
+ ('True', Timestamp.now().normal)})
+ return own_shard_range
+
+
+def enable_sharding(broker, args):
+ own_shard_range = _check_own_shard_range(broker, args)
+ _check_shard_ranges(own_shard_range, broker.get_shard_ranges())
+
+ if own_shard_range.state == ShardRange.ACTIVE:
+ own_shard_range = _enable_sharding(broker, own_shard_range, args)
+ print('Container moved to state %r with epoch %s.' %
+ (own_shard_range.state_text, own_shard_range.epoch.internal))
+ elif own_shard_range.state == ShardRange.SHARDING:
+ if own_shard_range.epoch:
+ print('Container already in state %r with epoch %s.' %
+ (own_shard_range.state_text, own_shard_range.epoch.internal))
+ print('No action required.')
+ else:
+ print('Container already in state %r but missing epoch.' %
+ own_shard_range.state_text)
+ own_shard_range = _enable_sharding(broker, own_shard_range, args)
+ print('Container in state %r given epoch %s.' %
+ (own_shard_range.state_text, own_shard_range.epoch.internal))
+ else:
+ print('WARNING: container in state %s (should be active or sharding).'
+ % own_shard_range.state_text)
+ print('Aborting.')
+ return 2
+
+ print('Run container-sharder on all nodes to shard the container.')
+ return 0
+
+
+def _add_find_args(parser):
+ parser.add_argument('rows_per_shard', nargs='?', type=int, default=500000)
+
+
+def _add_replace_args(parser):
+ parser.add_argument(
+ '--shards_account_prefix', metavar='shards_account_prefix', type=str,
+ required=False, help='Prefix for shards account', default='.shards_')
+ parser.add_argument(
+ '--replace-timeout', type=int, default=600,
+ help='Minimum DB timeout to use when replacing shard ranges.')
+ parser.add_argument(
+ '--force', '-f', action='store_true', default=False,
+ help='Delete existing shard ranges; no questions asked.')
+ parser.add_argument(
+ '--enable', action='store_true', default=False,
+ help='Enable sharding after adding shard ranges.')
+
+
+def _add_enable_args(parser):
+ parser.add_argument(
+ '--enable-timeout', type=int, default=300,
+ help='DB timeout to use when enabling sharding.')
+
+
+def _make_parser():
+ parser = argparse.ArgumentParser(description='Manage shard ranges')
+ parser.add_argument('container_db')
+ parser.add_argument('--verbose', '-v', action='count',
+ help='Increase output verbosity')
+ subparsers = parser.add_subparsers(
+ help='Sub-command help', title='Sub-commands')
+
+ # find
+ find_parser = subparsers.add_parser(
+ 'find', help='Find and display shard ranges')
+ _add_find_args(find_parser)
+ find_parser.set_defaults(func=find_ranges)
+
+ # delete
+ delete_parser = subparsers.add_parser(
+ 'delete', help='Delete all existing shard ranges from db')
+ delete_parser.add_argument(
+ '--force', '-f', action='store_true', default=False,
+ help='Delete existing shard ranges; no questions asked.')
+ delete_parser.set_defaults(func=delete_shard_ranges)
+
+ # show
+ show_parser = subparsers.add_parser(
+ 'show', help='Print shard range data')
+ show_parser.add_argument(
+ '--include_deleted', '-d', action='store_true', default=False,
+ help='Include deleted shard ranges in output.')
+ show_parser.add_argument(
+ '--brief', '-b', action='store_true', default=False,
+ help='Show only shard range bounds in output.')
+ show_parser.set_defaults(func=show_shard_ranges)
+
+ # info
+ info_parser = subparsers.add_parser(
+ 'info', help='Print container db info')
+ info_parser.set_defaults(func=db_info)
+
+ # replace
+ replace_parser = subparsers.add_parser(
+ 'replace',
+ help='Replace existing shard ranges. User will be prompted before '
+ 'deleting any existing shard ranges.')
+ replace_parser.add_argument('input', metavar='input_file',
+ type=str, help='Name of file')
+ _add_replace_args(replace_parser)
+ replace_parser.set_defaults(func=replace_shard_ranges)
+
+ # find_and_replace
+ find_replace_parser = subparsers.add_parser(
+ 'find_and_replace',
+ help='Find new shard ranges and replace existing shard ranges. '
+ 'User will be prompted before deleting any existing shard ranges.'
+ )
+ _add_find_args(find_replace_parser)
+ _add_replace_args(find_replace_parser)
+ _add_enable_args(find_replace_parser)
+ find_replace_parser.set_defaults(func=find_replace_shard_ranges)
+
+ # enable
+ enable_parser = subparsers.add_parser(
+ 'enable', help='Enable sharding and move db to sharding state.')
+ _add_enable_args(enable_parser)
+ enable_parser.set_defaults(func=enable_sharding)
+ _add_replace_args(enable_parser)
+ return parser
+
+
+def main(args=None):
+ parser = _make_parser()
+ args = parser.parse_args(args)
+ logger = get_logger({}, name='ContainerBroker', log_to_console=True)
+ broker = ContainerBroker(args.container_db, logger=logger,
+ skip_commits=True)
+ broker.get_info()
+ print('Loaded db broker for %s.' % broker.path, file=sys.stderr)
+ return args.func(broker, args)
+
+
+if __name__ == '__main__':
+ exit(main())
diff --git a/swift/cli/shard-info.py b/swift/cli/shard-info.py
new file mode 100644
index 0000000000..01223787f7
--- /dev/null
+++ b/swift/cli/shard-info.py
@@ -0,0 +1,195 @@
+# Copyright (c) 2017 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from collections import defaultdict
+
+from swift.common import utils
+from swift.common.db_replicator import roundrobin_datadirs
+from swift.common.ring import ring
+from swift.common.utils import Timestamp
+from swift.container.backend import ContainerBroker, DATADIR
+
+TAB = ' '
+
+
+def broker_key(broker):
+ broker.get_info()
+ return broker.path
+
+
+def container_type(broker):
+ return 'ROOT' if broker.is_root_container() else 'SHARD'
+
+
+def collect_brokers(conf_path, names2nodes):
+ conf = utils.readconf(conf_path, 'container-replicator')
+ root = conf.get('devices', '/srv/node')
+ swift_dir = conf.get('swift_dir', '/etc/swift')
+ c_ring = ring.Ring(swift_dir, ring_name='container')
+ dirs = []
+ brokers = defaultdict(dict)
+ for node in c_ring.devs:
+ if node is None:
+ continue
+ datadir = os.path.join(root, node['device'], DATADIR)
+ if os.path.isdir(datadir):
+ dirs.append((datadir, node['id'], lambda *args: True))
+ for part, object_file, node_id in roundrobin_datadirs(dirs):
+ broker = ContainerBroker(object_file)
+ for node in c_ring.get_part_nodes(int(part)):
+ if node['id'] == node_id:
+ node_index = str(node['index'])
+ break
+ else:
+ node_index = 'handoff'
+ names2nodes[broker_key(broker)][(node_id, node_index)] = broker
+ return brokers
+
+
+def print_broker_info(node, broker, indent_level=0):
+ indent = indent_level * TAB
+ info = broker.get_info()
+ raw_info = broker._get_info()
+ deleted_at = float(info['delete_timestamp'])
+ if deleted_at:
+ deleted_at = Timestamp(info['delete_timestamp']).isoformat
+ else:
+ deleted_at = ' - '
+ print('%s(%s) %s, objs: %s, bytes: %s, actual_objs: %s, put: %s, '
+ 'deleted: %s' %
+ (indent, node[1][0], broker.get_db_state(),
+ info['object_count'], info['bytes_used'], raw_info['object_count'],
+ Timestamp(info['put_timestamp']).isoformat, deleted_at))
+
+
+def print_db(node, broker, expect_type='ROOT', indent_level=0):
+ indent = indent_level * TAB
+ print('%s(%s) %s node id: %s, node index: %s' %
+ (indent, node[1][0], broker.db_file, node[0], node[1]))
+ actual_type = container_type(broker)
+ if actual_type != expect_type:
+ print('%s ERROR expected %s but found %s' %
+ (indent, expect_type, actual_type))
+
+
+def print_own_shard_range(node, sr, indent_level):
+ indent = indent_level * TAB
+ range = '%r - %r' % (sr.lower, sr.upper)
+ print('%s(%s) %23s, objs: %3s, bytes: %3s, timestamp: %s (%s), '
+ 'modified: %s (%s), %7s: %s (%s), deleted: %s epoch: %s' %
+ (indent, node[1][0], range, sr.object_count, sr.bytes_used,
+ sr.timestamp.isoformat, sr.timestamp.internal,
+ sr.meta_timestamp.isoformat, sr.meta_timestamp.internal,
+ sr.state_text, sr.state_timestamp.isoformat,
+ sr.state_timestamp.internal, sr.deleted,
+ sr.epoch.internal if sr.epoch else None))
+
+
+def print_own_shard_range_info(node, shard_ranges, indent_level=0):
+ shard_ranges.sort(key=lambda x: x.deleted)
+ for sr in shard_ranges:
+ print_own_shard_range(node, sr, indent_level)
+
+
+def print_shard_range(node, sr, indent_level):
+ indent = indent_level * TAB
+ range = '%r - %r' % (sr.lower, sr.upper)
+ print('%s(%s) %23s, objs: %3s, bytes: %3s, timestamp: %s (%s), '
+ 'modified: %s (%s), %7s: %s (%s), deleted: %s %s' %
+ (indent, node[1][0], range, sr.object_count, sr.bytes_used,
+ sr.timestamp.isoformat, sr.timestamp.internal,
+ sr.meta_timestamp.isoformat, sr.meta_timestamp.internal,
+ sr.state_text, sr.state_timestamp.isoformat,
+ sr.state_timestamp.internal, sr.deleted, sr.name))
+
+
+def print_shard_range_info(node, shard_ranges, indent_level=0):
+ shard_ranges.sort(key=lambda x: x.deleted)
+ for sr in shard_ranges:
+ print_shard_range(node, sr, indent_level)
+
+
+def print_sharding_info(node, broker, indent_level=0):
+ indent = indent_level * TAB
+ print('%s(%s) %s' % (indent, node[1][0], broker.get_sharding_sysmeta()))
+
+
+def print_container(name, name2nodes2brokers, expect_type='ROOT',
+ indent_level=0, used_names=None):
+ used_names = used_names or set()
+ indent = indent_level * TAB
+ node2broker = name2nodes2brokers[name]
+ ordered_by_index = sorted(node2broker.keys(), key=lambda x: x[1])
+ brokers = [(node, node2broker[node]) for node in ordered_by_index]
+
+ print('%sName: %s' % (indent, name))
+ if name in used_names:
+ print('%s (Details already listed)\n' % indent)
+ return
+
+ used_names.add(name)
+ print(indent + 'DB files:')
+ for node, broker in brokers:
+ print_db(node, broker, expect_type, indent_level=indent_level + 1)
+
+ print(indent + 'Info:')
+ for node, broker in brokers:
+ print_broker_info(node, broker, indent_level=indent_level + 1)
+
+ print(indent + 'Sharding info:')
+ for node, broker in brokers:
+ print_sharding_info(node, broker, indent_level=indent_level + 1)
+ print(indent + 'Own shard range:')
+ for node, broker in brokers:
+ shard_ranges = broker.get_shard_ranges(
+ include_deleted=True, include_own=True, exclude_others=True)
+ print_own_shard_range_info(node, shard_ranges,
+ indent_level=indent_level + 1)
+ print(indent + 'Shard ranges:')
+ shard_names = set()
+ for node, broker in brokers:
+ shard_ranges = broker.get_shard_ranges(include_deleted=True)
+ for sr_name in shard_ranges:
+ shard_names.add(sr_name.name)
+ print_shard_range_info(node, shard_ranges,
+ indent_level=indent_level + 1)
+ print(indent + 'Shards:')
+ for sr_name in shard_names:
+ print_container(sr_name, name2nodes2brokers, expect_type='SHARD',
+ indent_level=indent_level + 1, used_names=used_names)
+ print('\n')
+
+
+def run(conf_paths):
+ # container_name -> (node id, node index) -> broker
+ name2nodes2brokers = defaultdict(dict)
+ for conf_path in conf_paths:
+ collect_brokers(conf_path, name2nodes2brokers)
+
+ print('First column on each line is (node index)\n')
+ for name, node2broker in name2nodes2brokers.items():
+ expect_root = False
+ for node, broker in node2broker.items():
+ expect_root = broker.is_root_container() or expect_root
+ if expect_root:
+ print_container(name, name2nodes2brokers)
+
+
+if __name__ == '__main__':
+ conf_dir = '/etc/swift/container-server'
+ conf_paths = [os.path.join(conf_dir, p) for p in os.listdir(conf_dir)
+ if p.endswith(('conf', 'conf.d'))]
+ run(conf_paths)
diff --git a/swift/common/db.py b/swift/common/db.py
index b05eeb8d11..6425e85034 100644
--- a/swift/common/db.py
+++ b/swift/common/db.py
@@ -71,6 +71,18 @@ def native_str_keys(metadata):
metadata[k.decode('utf-8')] = sv
+ZERO_LIKE_VALUES = {None, '', 0, '0'}
+
+
+def zero_like(count):
+ """
+ We've cargo culted our consumers to be tolerant of various expressions of
+ zero in our databases for backwards compatibility with less disciplined
+ producers.
+ """
+ return count in ZERO_LIKE_VALUES
+
+
def _db_timeout(timeout, db_file, call):
with LockTimeout(timeout, db_file):
retry_wait = 0.001
@@ -208,11 +220,27 @@ class DatabaseBroker(object):
def __init__(self, db_file, timeout=BROKER_TIMEOUT, logger=None,
account=None, container=None, pending_timeout=None,
- stale_reads_ok=False):
- """Encapsulates working with a database."""
+ stale_reads_ok=False, skip_commits=False):
+ """Encapsulates working with a database.
+
+ :param db_file: path to a database file.
+ :param timeout: timeout used for database operations.
+ :param logger: a logger instance.
+ :param account: name of account.
+ :param container: name of container.
+ :param pending_timeout: timeout used when attempting to take a lock to
+ write to pending file.
+ :param stale_reads_ok: if True then no error is raised if pending
+ commits cannot be committed before the database is read, otherwise
+ an error is raised.
+ :param skip_commits: if True then this broker instance will never
+ commit records from the pending file to the database;
+ :meth:`~swift.common.db.DatabaseBroker.put_record` should not
+ called on brokers with skip_commits True.
+ """
self.conn = None
- self.db_file = db_file
- self.pending_file = self.db_file + '.pending'
+ self._db_file = db_file
+ self.pending_file = self._db_file + '.pending'
self.pending_timeout = pending_timeout or 10
self.stale_reads_ok = stale_reads_ok
self.db_dir = os.path.dirname(db_file)
@@ -221,6 +249,7 @@ class DatabaseBroker(object):
self.account = account
self.container = container
self._db_version = -1
+ self.skip_commits = skip_commits
def __str__(self):
"""
@@ -240,9 +269,9 @@ class DatabaseBroker(object):
:param put_timestamp: internalized timestamp of initial PUT request
:param storage_policy_index: only required for containers
"""
- if self.db_file == ':memory:':
+ if self._db_file == ':memory:':
tmp_db_file = None
- conn = get_db_connection(self.db_file, self.timeout)
+ conn = get_db_connection(self._db_file, self.timeout)
else:
mkdirs(self.db_dir)
fd, tmp_db_file = mkstemp(suffix='.tmp', dir=self.db_dir)
@@ -329,15 +358,22 @@ class DatabaseBroker(object):
self._delete_db(conn, timestamp)
conn.commit()
+ @property
+ def db_file(self):
+ return self._db_file
+
+ def get_device_path(self):
+ suffix_path = os.path.dirname(self.db_dir)
+ partition_path = os.path.dirname(suffix_path)
+ dbs_path = os.path.dirname(partition_path)
+ return os.path.dirname(dbs_path)
+
def quarantine(self, reason):
"""
The database will be quarantined and a
sqlite3.DatabaseError will be raised indicating the action taken.
"""
- prefix_path = os.path.dirname(self.db_dir)
- partition_path = os.path.dirname(prefix_path)
- dbs_path = os.path.dirname(partition_path)
- device_path = os.path.dirname(dbs_path)
+ device_path = self.get_device_path()
quar_path = os.path.join(device_path, 'quarantined',
self.db_type + 's',
os.path.basename(self.db_dir))
@@ -377,6 +413,20 @@ class DatabaseBroker(object):
self.quarantine(exc_hint)
+ @contextmanager
+ def updated_timeout(self, new_timeout):
+ """Use with "with" statement; updates ``timeout`` within the block."""
+ old_timeout = self.timeout
+ try:
+ self.timeout = new_timeout
+ if self.conn:
+ self.conn.timeout = new_timeout
+ yield old_timeout
+ finally:
+ self.timeout = old_timeout
+ if self.conn:
+ self.conn.timeout = old_timeout
+
@contextmanager
def get(self):
"""Use with the "with" statement; returns a database connection."""
@@ -477,6 +527,23 @@ class DatabaseBroker(object):
with self.get() as conn:
return self._is_deleted(conn)
+ def empty(self):
+ """
+ Check if the broker abstraction contains any undeleted records.
+ """
+ raise NotImplementedError()
+
+ def is_reclaimable(self, now, reclaim_age):
+ """
+ Check if the broker abstraction is empty, and has been marked deleted
+ for at least a reclaim age.
+ """
+ info = self.get_replication_info()
+ return (zero_like(info['count']) and
+ (Timestamp(now - reclaim_age) >
+ Timestamp(info['delete_timestamp']) >
+ Timestamp(info['put_timestamp'])))
+
def merge_timestamps(self, created_at, put_timestamp, delete_timestamp):
"""
Used in replication to handle updating timestamps.
@@ -548,13 +615,15 @@ class DatabaseBroker(object):
result.append({'remote_id': row[0], 'sync_point': row[1]})
return result
- def get_max_row(self):
+ def get_max_row(self, table=None):
+ if not table:
+ table = self.db_contains_type
query = '''
SELECT SQLITE_SEQUENCE.seq
FROM SQLITE_SEQUENCE
WHERE SQLITE_SEQUENCE.name == '%s'
LIMIT 1
- ''' % (self.db_contains_type)
+ ''' % (table, )
with self.get() as conn:
row = conn.execute(query).fetchone()
return row[0] if row else -1
@@ -582,11 +651,26 @@ class DatabaseBroker(object):
return curs.fetchone()
def put_record(self, record):
- if self.db_file == ':memory:':
+ """
+ Put a record into the DB. If the DB has an associated pending file with
+ space then the record is appended to that file and a commit to the DB
+ is deferred. If the DB is in-memory or its pending file is full then
+ the record will be committed immediately.
+
+ :param record: a record to be added to the DB.
+ :raises DatabaseConnectionError: if the DB file does not exist or if
+ ``skip_commits`` is True.
+ :raises LockTimeout: if a timeout occurs while waiting to take a lock
+ to write to the pending file.
+ """
+ if self._db_file == ':memory:':
self.merge_items([record])
return
if not os.path.exists(self.db_file):
raise DatabaseConnectionError(self.db_file, "DB doesn't exist")
+ if self.skip_commits:
+ raise DatabaseConnectionError(self.db_file,
+ 'commits not accepted')
with lock_parent_directory(self.pending_file, self.pending_timeout):
pending_size = 0
try:
@@ -606,6 +690,10 @@ class DatabaseBroker(object):
protocol=PICKLE_PROTOCOL).encode('base64'))
fp.flush()
+ def _skip_commit_puts(self):
+ return (self._db_file == ':memory:' or self.skip_commits or not
+ os.path.exists(self.pending_file))
+
def _commit_puts(self, item_list=None):
"""
Scan for .pending files and commit the found records by feeding them
@@ -614,7 +702,13 @@ class DatabaseBroker(object):
:param item_list: A list of items to commit in addition to .pending
"""
- if self.db_file == ':memory:' or not os.path.exists(self.pending_file):
+ if self._skip_commit_puts():
+ if item_list:
+ # this broker instance should not be used to commit records,
+ # but if it is then raise an error rather than quietly
+ # discarding the records in item_list.
+ raise DatabaseConnectionError(self.db_file,
+ 'commits not accepted')
return
if item_list is None:
item_list = []
@@ -645,7 +739,7 @@ class DatabaseBroker(object):
Catch failures of _commit_puts() if broker is intended for
reading of stats, and thus does not care for pending updates.
"""
- if self.db_file == ':memory:' or not os.path.exists(self.pending_file):
+ if self._skip_commit_puts():
return
try:
with lock_parent_directory(self.pending_file,
@@ -663,6 +757,12 @@ class DatabaseBroker(object):
"""
raise NotImplementedError
+ def merge_items(self, item_list, source=None):
+ """
+ Save :param:item_list to the database.
+ """
+ raise NotImplementedError
+
def make_tuple_for_pickle(self, record):
"""
Turn this db record dict into the format this service uses for
@@ -701,7 +801,7 @@ class DatabaseBroker(object):
within 512k of a boundary, it allocates to the next boundary.
Boundaries are 2m, 5m, 10m, 25m, 50m, then every 50m after.
"""
- if not DB_PREALLOCATION or self.db_file == ':memory:':
+ if not DB_PREALLOCATION or self._db_file == ':memory:':
return
MB = (1024 * 1024)
@@ -830,40 +930,46 @@ class DatabaseBroker(object):
def reclaim(self, age_timestamp, sync_timestamp):
"""
- Delete rows from the db_contains_type table that are marked deleted
- and whose created_at timestamp is < age_timestamp. Also deletes rows
- from incoming_sync and outgoing_sync where the updated_at timestamp is
- < sync_timestamp.
+ Delete reclaimable rows and metadata from the db.
- In addition, this calls the DatabaseBroker's :func:`_reclaim` method.
+ By default this method will delete rows from the db_contains_type table
+ that are marked deleted and whose created_at timestamp is <
+ age_timestamp, and deletes rows from incoming_sync and outgoing_sync
+ where the updated_at timestamp is < sync_timestamp. In addition, this
+ calls the :meth:`_reclaim_metadata` method.
+
+ Subclasses may reclaim other items by overriding :meth:`_reclaim`.
:param age_timestamp: max created_at timestamp of object rows to delete
:param sync_timestamp: max update_at timestamp of sync rows to delete
"""
- if self.db_file != ':memory:' and os.path.exists(self.pending_file):
+ if not self._skip_commit_puts():
with lock_parent_directory(self.pending_file,
self.pending_timeout):
self._commit_puts()
with self.get() as conn:
- conn.execute('''
- DELETE FROM %s WHERE deleted = 1 AND %s < ?
- ''' % (self.db_contains_type, self.db_reclaim_timestamp),
- (age_timestamp,))
- try:
- conn.execute('''
- DELETE FROM outgoing_sync WHERE updated_at < ?
- ''', (sync_timestamp,))
- conn.execute('''
- DELETE FROM incoming_sync WHERE updated_at < ?
- ''', (sync_timestamp,))
- except sqlite3.OperationalError as err:
- # Old dbs didn't have updated_at in the _sync tables.
- if 'no such column: updated_at' not in str(err):
- raise
- DatabaseBroker._reclaim(self, conn, age_timestamp)
+ self._reclaim(conn, age_timestamp, sync_timestamp)
+ self._reclaim_metadata(conn, age_timestamp)
conn.commit()
- def _reclaim(self, conn, timestamp):
+ def _reclaim(self, conn, age_timestamp, sync_timestamp):
+ conn.execute('''
+ DELETE FROM %s WHERE deleted = 1 AND %s < ?
+ ''' % (self.db_contains_type, self.db_reclaim_timestamp),
+ (age_timestamp,))
+ try:
+ conn.execute('''
+ DELETE FROM outgoing_sync WHERE updated_at < ?
+ ''', (sync_timestamp,))
+ conn.execute('''
+ DELETE FROM incoming_sync WHERE updated_at < ?
+ ''', (sync_timestamp,))
+ except sqlite3.OperationalError as err:
+ # Old dbs didn't have updated_at in the _sync tables.
+ if 'no such column: updated_at' not in str(err):
+ raise
+
+ def _reclaim_metadata(self, conn, timestamp):
"""
Removes any empty metadata values older than the timestamp using the
given database connection. This function will not call commit on the
diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py
index c464341b21..0d063cd455 100644
--- a/swift/common/db_replicator.py
+++ b/swift/common/db_replicator.py
@@ -33,10 +33,12 @@ from swift.common.direct_client import quote
from swift.common.utils import get_logger, whataremyips, storage_directory, \
renamer, mkdirs, lock_parent_directory, config_true_value, \
unlink_older_than, dump_recon_cache, rsync_module_interpolation, \
- json, Timestamp, parse_override_options, round_robin_iter, Everything
+ json, parse_override_options, round_robin_iter, Everything, get_db_files, \
+ parse_db_filename
from swift.common import ring
from swift.common.ring.utils import is_local_device
-from swift.common.http import HTTP_NOT_FOUND, HTTP_INSUFFICIENT_STORAGE
+from swift.common.http import HTTP_NOT_FOUND, HTTP_INSUFFICIENT_STORAGE, \
+ is_success
from swift.common.bufferedhttp import BufferedHTTPConnection
from swift.common.exceptions import DriveNotMounted
from swift.common.daemon import Daemon
@@ -87,11 +89,14 @@ def roundrobin_datadirs(datadirs):
found (in their proper places). The partitions within each data
dir are walked randomly, however.
- :param datadirs: a list of (path, node_id, partition_filter) to walk
- :returns: A generator of (partition, path_to_db_file, node_id)
+ :param datadirs: a list of tuples of (path, context, partition_filter) to
+ walk. The context may be any object; the context is not
+ used by this function but is included with each yielded
+ tuple.
+ :returns: A generator of (partition, path_to_db_file, context)
"""
- def walk_datadir(datadir, node_id, part_filter):
+ def walk_datadir(datadir, context, part_filter):
partitions = [pd for pd in os.listdir(datadir)
if looks_like_partition(pd) and part_filter(pd)]
random.shuffle(partitions)
@@ -116,17 +121,23 @@ def roundrobin_datadirs(datadirs):
if not os.path.isdir(hash_dir):
continue
object_file = os.path.join(hash_dir, hsh + '.db')
+ # common case
if os.path.exists(object_file):
- yield (partition, object_file, node_id)
- else:
- try:
- os.rmdir(hash_dir)
- except OSError as e:
- if e.errno != errno.ENOTEMPTY:
- raise
+ yield (partition, object_file, context)
+ continue
+ # look for any alternate db filenames
+ db_files = get_db_files(object_file)
+ if db_files:
+ yield (partition, db_files[-1], context)
+ continue
+ try:
+ os.rmdir(hash_dir)
+ except OSError as e:
+ if e.errno != errno.ENOTEMPTY:
+ raise
- its = [walk_datadir(datadir, node_id, filt)
- for datadir, node_id, filt in datadirs]
+ its = [walk_datadir(datadir, context, filt)
+ for datadir, context, filt in datadirs]
rr_its = round_robin_iter(its)
for datadir in rr_its:
@@ -212,7 +223,7 @@ class Replicator(Daemon):
self.stats = {'attempted': 0, 'success': 0, 'failure': 0, 'ts_repl': 0,
'no_change': 0, 'hashmatch': 0, 'rsync': 0, 'diff': 0,
'remove': 0, 'empty': 0, 'remote_merge': 0,
- 'start': time.time(), 'diff_capped': 0,
+ 'start': time.time(), 'diff_capped': 0, 'deferred': 0,
'failure_nodes': {}}
def _report_stats(self):
@@ -309,9 +320,20 @@ class Replicator(Daemon):
different_region=different_region):
return False
with Timeout(replicate_timeout or self.node_timeout):
- response = http.replicate(replicate_method, local_id)
+ response = http.replicate(replicate_method, local_id,
+ os.path.basename(broker.db_file))
return response and 200 <= response.status < 300
+ def _send_replicate_request(self, http, *repl_args):
+ with Timeout(self.node_timeout):
+ response = http.replicate(*repl_args)
+ if not response or not is_success(response.status):
+ if response:
+ self.logger.error('ERROR Bad response %s from %s',
+ response.status, http.host)
+ return False
+ return True
+
def _usync_db(self, point, broker, http, remote_id, local_id):
"""
Sync a db by sending all records since the last sync.
@@ -326,26 +348,29 @@ class Replicator(Daemon):
"""
self.stats['diff'] += 1
self.logger.increment('diffs')
- self.logger.debug('Syncing chunks with %s, starting at %s',
- http.host, point)
+ self.logger.debug('%s usyncing chunks to %s, starting at row %s',
+ broker.db_file,
+ '%(ip)s:%(port)s/%(device)s' % http.node,
+ point)
+ start = time.time()
sync_table = broker.get_syncs()
objects = broker.get_items_since(point, self.per_diff)
diffs = 0
while len(objects) and diffs < self.max_diffs:
diffs += 1
- with Timeout(self.node_timeout):
- response = http.replicate('merge_items', objects, local_id)
- if not response or response.status >= 300 or response.status < 200:
- if response:
- self.logger.error(_('ERROR Bad response %(status)s from '
- '%(host)s'),
- {'status': response.status,
- 'host': http.host})
+ if not self._send_replicate_request(
+ http, 'merge_items', objects, local_id):
return False
# replication relies on db order to send the next merge batch in
# order with no gaps
point = objects[-1]['ROWID']
objects = broker.get_items_since(point, self.per_diff)
+
+ self.logger.debug('%s usyncing chunks to %s, finished at row %s (%gs)',
+ broker.db_file,
+ '%(ip)s:%(port)s/%(device)s' % http.node,
+ point, time.time() - start)
+
if objects:
self.logger.debug(
'Synchronization for %s has fallen more than '
@@ -397,9 +422,8 @@ class Replicator(Daemon):
:returns: ReplConnection object
"""
- return ReplConnection(node, partition,
- os.path.basename(db_file).split('.', 1)[0],
- self.logger)
+ hsh, other, ext = parse_db_filename(db_file)
+ return ReplConnection(node, partition, hsh, self.logger)
def _gather_sync_args(self, info):
"""
@@ -449,32 +473,79 @@ class Replicator(Daemon):
if rinfo.get('metadata', ''):
broker.update_metadata(json.loads(rinfo['metadata']))
if self._in_sync(rinfo, info, broker, local_sync):
+ self.logger.debug('%s in sync with %s, nothing to do',
+ broker.db_file,
+ '%(ip)s:%(port)s/%(device)s' % node)
return True
- # if the difference in rowids between the two differs by
- # more than 50% and the difference is greater than per_diff,
- # rsync then do a remote merge.
- # NOTE: difference > per_diff stops us from dropping to rsync
- # on smaller containers, who have only a few rows to sync.
- if rinfo['max_row'] / float(info['max_row']) < 0.5 and \
- info['max_row'] - rinfo['max_row'] > self.per_diff:
- self.stats['remote_merge'] += 1
- self.logger.increment('remote_merges')
- return self._rsync_db(broker, node, http, info['id'],
- replicate_method='rsync_then_merge',
- replicate_timeout=(info['count'] / 2000),
- different_region=different_region)
- # else send diffs over to the remote server
- return self._usync_db(max(rinfo['point'], local_sync),
- broker, http, rinfo['id'], info['id'])
+ return self._choose_replication_mode(
+ node, rinfo, info, local_sync, broker, http,
+ different_region)
+ return False
+
+ def _choose_replication_mode(self, node, rinfo, info, local_sync, broker,
+ http, different_region):
+ # if the difference in rowids between the two differs by
+ # more than 50% and the difference is greater than per_diff,
+ # rsync then do a remote merge.
+ # NOTE: difference > per_diff stops us from dropping to rsync
+ # on smaller containers, who have only a few rows to sync.
+ if (rinfo['max_row'] / float(info['max_row']) < 0.5 and
+ info['max_row'] - rinfo['max_row'] > self.per_diff):
+ self.stats['remote_merge'] += 1
+ self.logger.increment('remote_merges')
+ return self._rsync_db(broker, node, http, info['id'],
+ replicate_method='rsync_then_merge',
+ replicate_timeout=(info['count'] / 2000),
+ different_region=different_region)
+ # else send diffs over to the remote server
+ return self._usync_db(max(rinfo['point'], local_sync),
+ broker, http, rinfo['id'], info['id'])
def _post_replicate_hook(self, broker, info, responses):
"""
- :param broker: the container that just replicated
+ :param broker: broker instance for the database that just replicated
:param info: pre-replication full info dict
:param responses: a list of bools indicating success from nodes
"""
pass
+ def cleanup_post_replicate(self, broker, orig_info, responses):
+ """
+ Cleanup non primary database from disk if needed.
+
+ :param broker: the broker for the database we're replicating
+ :param orig_info: snapshot of the broker replication info dict taken
+ before replication
+ :param responses: a list of boolean success values for each replication
+ request to other nodes
+
+ :return success: returns False if deletion of the database was
+ attempted but unsuccessful, otherwise returns True.
+ """
+ log_template = 'Not deleting db %s (%%s)' % broker.db_file
+ max_row_delta = broker.get_max_row() - orig_info['max_row']
+ if max_row_delta < 0:
+ reason = 'negative max_row_delta: %s' % max_row_delta
+ self.logger.error(log_template, reason)
+ return True
+ if max_row_delta:
+ reason = '%s new rows' % max_row_delta
+ self.logger.debug(log_template, reason)
+ return True
+ if not (responses and all(responses)):
+ reason = '%s/%s success' % (responses.count(True), len(responses))
+ self.logger.debug(log_template, reason)
+ return True
+ # If the db has been successfully synced to all of its peers, it can be
+ # removed. Callers should have already checked that the db is not on a
+ # primary node.
+ if not self.delete_db(broker):
+ self.logger.debug(
+ 'Failed to delete db %s', broker.db_file)
+ return False
+ self.logger.debug('Successfully deleted db %s', broker.db_file)
+ return True
+
def _replicate_object(self, partition, object_file, node_id):
"""
Replicate the db, choosing method based on whether or not it
@@ -483,12 +554,20 @@ class Replicator(Daemon):
:param partition: partition to be replicated to
:param object_file: DB file name to be replicated
:param node_id: node id of the node to be replicated to
+ :returns: a tuple (success, responses). ``success`` is a boolean that
+ is True if the method completed successfully, False otherwise.
+ ``responses`` is a list of booleans each of which indicates the
+ success or not of replicating to a peer node if replication has
+ been attempted. ``success`` is False if any of ``responses`` is
+ False; when ``responses`` is empty, ``success`` may be either True
+ or False.
"""
start_time = now = time.time()
self.logger.debug('Replicating db %s', object_file)
self.stats['attempted'] += 1
self.logger.increment('attempts')
shouldbehere = True
+ responses = []
try:
broker = self.brokerclass(object_file, pending_timeout=30)
broker.reclaim(now - self.reclaim_age,
@@ -518,18 +597,12 @@ class Replicator(Daemon):
failure_dev['device'])
for failure_dev in nodes])
self.logger.increment('failures')
- return
- # The db is considered deleted if the delete_timestamp value is greater
- # than the put_timestamp, and there are no objects.
- delete_timestamp = Timestamp(info.get('delete_timestamp') or 0)
- put_timestamp = Timestamp(info.get('put_timestamp') or 0)
- if (now - self.reclaim_age) > delete_timestamp > put_timestamp and \
- info['count'] in (None, '', 0, '0'):
+ return False, responses
+ if broker.is_reclaimable(now, self.reclaim_age):
if self.report_up_to_date(info):
self.delete_db(broker)
self.logger.timing_since('timing', start_time)
- return
- responses = []
+ return True, responses
failure_devs_info = set()
nodes = self.ring.get_part_nodes(int(partition))
local_dev = None
@@ -587,14 +660,11 @@ class Replicator(Daemon):
except (Exception, Timeout):
self.logger.exception('UNHANDLED EXCEPTION: in post replicate '
'hook for %s', broker.db_file)
- if not shouldbehere and responses and all(responses):
- # If the db shouldn't be on this node and has been successfully
- # synced to all of its peers, it can be removed.
- if not self.delete_db(broker):
+ if not shouldbehere:
+ if not self.cleanup_post_replicate(broker, info, responses):
failure_devs_info.update(
[(failure_dev['replication_ip'], failure_dev['device'])
for failure_dev in repl_nodes])
-
target_devs_info = set([(target_dev['replication_ip'],
target_dev['device'])
for target_dev in repl_nodes])
@@ -602,6 +672,9 @@ class Replicator(Daemon):
self._add_failure_stats(failure_devs_info)
self.logger.timing_since('timing', start_time)
+ if shouldbehere:
+ responses.append(True)
+ return all(responses), responses
def delete_db(self, broker):
object_file = broker.db_file
@@ -746,6 +819,9 @@ class ReplicatorRpc(object):
self.mount_check = mount_check
self.logger = logger or get_logger({}, log_route='replicator-rpc')
+ def _db_file_exists(self, db_path):
+ return os.path.exists(db_path)
+
def dispatch(self, replicate_args, args):
if not hasattr(args, 'pop'):
return HTTPBadRequest(body='Invalid object type')
@@ -764,7 +840,7 @@ class ReplicatorRpc(object):
# someone might be about to rsync a db to us,
# make sure there's a tmp dir to receive it.
mkdirs(os.path.join(self.root, drive, 'tmp'))
- if not os.path.exists(db_file):
+ if not self._db_file_exists(db_file):
return HTTPNotFound()
return getattr(self, op)(self.broker_class(db_file), args)
@@ -863,6 +939,8 @@ class ReplicatorRpc(object):
def complete_rsync(self, drive, db_file, args):
old_filename = os.path.join(self.root, drive, 'tmp', args[0])
+ if args[1:]:
+ db_file = os.path.join(os.path.dirname(db_file), args[1])
if os.path.exists(db_file):
return HTTPNotFound()
if not os.path.exists(old_filename):
@@ -872,12 +950,21 @@ class ReplicatorRpc(object):
renamer(old_filename, db_file)
return HTTPNoContent()
+ def _abort_rsync_then_merge(self, db_file, tmp_filename):
+ return not (self._db_file_exists(db_file) and
+ os.path.exists(tmp_filename))
+
+ def _post_rsync_then_merge_hook(self, existing_broker, new_broker):
+ # subclasses may override to make custom changes to the new broker
+ pass
+
def rsync_then_merge(self, drive, db_file, args):
- old_filename = os.path.join(self.root, drive, 'tmp', args[0])
- if not os.path.exists(db_file) or not os.path.exists(old_filename):
+ tmp_filename = os.path.join(self.root, drive, 'tmp', args[0])
+ if self._abort_rsync_then_merge(db_file, tmp_filename):
return HTTPNotFound()
- new_broker = self.broker_class(old_filename)
+ new_broker = self.broker_class(tmp_filename)
existing_broker = self.broker_class(db_file)
+ db_file = existing_broker.db_file
point = -1
objects = existing_broker.get_items_since(point, 1000)
while len(objects):
@@ -885,9 +972,13 @@ class ReplicatorRpc(object):
point = objects[-1]['ROWID']
objects = existing_broker.get_items_since(point, 1000)
sleep()
+ new_broker.merge_syncs(existing_broker.get_syncs())
+ self._post_rsync_then_merge_hook(existing_broker, new_broker)
new_broker.newid(args[0])
new_broker.update_metadata(existing_broker.metadata)
- renamer(old_filename, db_file)
+ if self._abort_rsync_then_merge(db_file, tmp_filename):
+ return HTTPNotFound()
+ renamer(tmp_filename, db_file)
return HTTPNoContent()
# Footnote [1]:
diff --git a/swift/common/direct_client.py b/swift/common/direct_client.py
index fad4440f64..9f112afa95 100644
--- a/swift/common/direct_client.py
+++ b/swift/common/direct_client.py
@@ -54,22 +54,72 @@ class DirectClientException(ClientException):
http_reason=resp.reason, http_headers=headers)
-def _make_req(node, part, method, path, _headers, stype,
- conn_timeout=5, response_timeout=15):
+def _make_req(node, part, method, path, headers, stype,
+ conn_timeout=5, response_timeout=15, send_timeout=15,
+ contents=None, content_length=None, chunk_size=65535):
"""
Make request to backend storage node.
(i.e. 'Account', 'Container', 'Object')
:param node: a node dict from a ring
- :param part: an integer, the partion number
+ :param part: an integer, the partition number
:param method: a string, the HTTP method (e.g. 'PUT', 'DELETE', etc)
:param path: a string, the request path
:param headers: a dict, header name => value
:param stype: a string, describing the type of service
+ :param conn_timeout: timeout while waiting for connection; default is 5
+ seconds
+ :param response_timeout: timeout while waiting for response; default is 15
+ seconds
+ :param send_timeout: timeout for sending request body; default is 15
+ seconds
+ :param contents: an iterable or string to read object data from
+ :param content_length: value to send as content-length header
+ :param chunk_size: if defined, chunk size of data to send
:returns: an HTTPResponse object
+ :raises DirectClientException: if the response status is not 2xx
+ :raises eventlet.Timeout: if either conn_timeout or response_timeout is
+ exceeded
"""
+ if contents is not None:
+ if content_length is not None:
+ headers['Content-Length'] = str(content_length)
+ else:
+ for n, v in headers.items():
+ if n.lower() == 'content-length':
+ content_length = int(v)
+ if not contents:
+ headers['Content-Length'] = '0'
+ if isinstance(contents, six.string_types):
+ contents = [contents]
+ if content_length is None:
+ headers['Transfer-Encoding'] = 'chunked'
+
with Timeout(conn_timeout):
conn = http_connect(node['ip'], node['port'], node['device'], part,
- method, path, headers=_headers)
+ method, path, headers=headers)
+
+ if contents is not None:
+ contents_f = FileLikeIter(contents)
+
+ with Timeout(send_timeout):
+ if content_length is None:
+ chunk = contents_f.read(chunk_size)
+ while chunk:
+ conn.send('%x\r\n%s\r\n' % (len(chunk), chunk))
+ chunk = contents_f.read(chunk_size)
+ conn.send('0\r\n\r\n')
+ else:
+ left = content_length
+ while left > 0:
+ size = chunk_size
+ if size > left:
+ size = left
+ chunk = contents_f.read(size)
+ if not chunk:
+ break
+ conn.send(chunk)
+ left -= len(chunk)
+
with Timeout(response_timeout):
resp = conn.getresponse()
resp.read()
@@ -82,7 +132,7 @@ def _get_direct_account_container(path, stype, node, part,
marker=None, limit=None,
prefix=None, delimiter=None,
conn_timeout=5, response_timeout=15,
- end_marker=None, reverse=None):
+ end_marker=None, reverse=None, headers=None):
"""Base class for get direct account and container.
Do not use directly use the get_direct_account or
@@ -105,7 +155,7 @@ def _get_direct_account_container(path, stype, node, part,
with Timeout(conn_timeout):
conn = http_connect(node['ip'], node['port'], node['device'], part,
'GET', path, query_string=qs,
- headers=gen_headers())
+ headers=gen_headers(hdrs_in=headers))
with Timeout(response_timeout):
resp = conn.getresponse()
if not is_success(resp.status):
@@ -121,11 +171,12 @@ def _get_direct_account_container(path, stype, node, part,
return resp_headers, json.loads(resp.read())
-def gen_headers(hdrs_in=None, add_ts=False):
+def gen_headers(hdrs_in=None, add_ts=False, add_user_agent=True):
hdrs_out = HeaderKeyDict(hdrs_in) if hdrs_in else HeaderKeyDict()
if add_ts:
hdrs_out['X-Timestamp'] = Timestamp.now().internal
- hdrs_out['User-Agent'] = 'direct-client %s' % os.getpid()
+ if add_user_agent:
+ hdrs_out['User-Agent'] = 'direct-client %s' % os.getpid()
return hdrs_out
@@ -197,7 +248,7 @@ def direct_head_container(node, part, account, container, conn_timeout=5,
def direct_get_container(node, part, account, container, marker=None,
limit=None, prefix=None, delimiter=None,
conn_timeout=5, response_timeout=15, end_marker=None,
- reverse=None):
+ reverse=None, headers=None):
"""
Get container listings directly from the container server.
@@ -213,6 +264,7 @@ def direct_get_container(node, part, account, container, marker=None,
:param response_timeout: timeout in seconds for getting the response
:param end_marker: end_marker query
:param reverse: reverse the returned listing
+ :param headers: headers to be included in the request
:returns: a tuple of (response headers, a list of objects) The response
headers will be a HeaderKeyDict.
"""
@@ -224,7 +276,8 @@ def direct_get_container(node, part, account, container, marker=None,
end_marker=end_marker,
reverse=reverse,
conn_timeout=conn_timeout,
- response_timeout=response_timeout)
+ response_timeout=response_timeout,
+ headers=headers)
def direct_delete_container(node, part, account, container, conn_timeout=5,
@@ -250,6 +303,37 @@ def direct_delete_container(node, part, account, container, conn_timeout=5,
'Container', conn_timeout, response_timeout)
+def direct_put_container(node, part, account, container, conn_timeout=5,
+ response_timeout=15, headers=None, contents=None,
+ content_length=None, chunk_size=65535):
+ """
+ Make a PUT request to a container server.
+
+ :param node: node dictionary from the ring
+ :param part: partition the container is on
+ :param account: account name
+ :param container: container name
+ :param conn_timeout: timeout in seconds for establishing the connection
+ :param response_timeout: timeout in seconds for getting the response
+ :param headers: additional headers to include in the request
+ :param contents: an iterable or string to send in request body (optional)
+ :param content_length: value to send as content-length header (optional)
+ :param chunk_size: chunk size of data to send (optional)
+ :raises ClientException: HTTP PUT request failed
+ """
+ if headers is None:
+ headers = {}
+
+ lower_headers = set(k.lower() for k in headers)
+ headers_out = gen_headers(headers,
+ add_ts='x-timestamp' not in lower_headers,
+ add_user_agent='user-agent' not in lower_headers)
+ path = '/%s/%s' % (account, container)
+ _make_req(node, part, 'PUT', path, headers_out, 'Container', conn_timeout,
+ response_timeout, contents=contents,
+ content_length=content_length, chunk_size=chunk_size)
+
+
def direct_put_container_object(node, part, account, container, obj,
conn_timeout=5, response_timeout=15,
headers=None):
@@ -385,56 +469,18 @@ def direct_put_object(node, part, account, container, name, contents,
headers = {}
if etag:
headers['ETag'] = etag.strip('"')
- if content_length is not None:
- headers['Content-Length'] = str(content_length)
- else:
- for n, v in headers.items():
- if n.lower() == 'content-length':
- content_length = int(v)
if content_type is not None:
headers['Content-Type'] = content_type
else:
headers['Content-Type'] = 'application/octet-stream'
- if not contents:
- headers['Content-Length'] = '0'
- if isinstance(contents, six.string_types):
- contents = [contents]
# Incase the caller want to insert an object with specific age
add_ts = 'X-Timestamp' not in headers
- if content_length is None:
- headers['Transfer-Encoding'] = 'chunked'
+ resp = _make_req(
+ node, part, 'PUT', path, gen_headers(headers, add_ts=add_ts),
+ 'Object', conn_timeout, response_timeout, contents=contents,
+ content_length=content_length, chunk_size=chunk_size)
- with Timeout(conn_timeout):
- conn = http_connect(node['ip'], node['port'], node['device'], part,
- 'PUT', path, headers=gen_headers(headers, add_ts))
-
- contents_f = FileLikeIter(contents)
-
- if content_length is None:
- chunk = contents_f.read(chunk_size)
- while chunk:
- conn.send('%x\r\n%s\r\n' % (len(chunk), chunk))
- chunk = contents_f.read(chunk_size)
- conn.send('0\r\n\r\n')
- else:
- left = content_length
- while left > 0:
- size = chunk_size
- if size > left:
- size = left
- chunk = contents_f.read(size)
- if not chunk:
- break
- conn.send(chunk)
- left -= len(chunk)
-
- with Timeout(response_timeout):
- resp = conn.getresponse()
- resp.read()
- if not is_success(resp.status):
- raise DirectClientException('Object', 'PUT',
- node, part, path, resp)
return resp.getheader('etag').strip('"')
diff --git a/swift/common/manager.py b/swift/common/manager.py
index 330f8310f4..71f9e689b3 100644
--- a/swift/common/manager.py
+++ b/swift/common/manager.py
@@ -34,7 +34,7 @@ PROC_DIR = '/proc'
ALL_SERVERS = ['account-auditor', 'account-server', 'container-auditor',
'container-replicator', 'container-reconciler',
- 'container-server', 'container-sync',
+ 'container-server', 'container-sharder', 'container-sync',
'container-updater', 'object-auditor', 'object-server',
'object-expirer', 'object-replicator',
'object-reconstructor', 'object-updater',
@@ -637,13 +637,16 @@ class Server(object):
{'server': self.server, 'pid': pid, 'conf': conf_file})
return 0
- def spawn(self, conf_file, once=False, wait=True, daemon=True, **kwargs):
+ def spawn(self, conf_file, once=False, wait=True, daemon=True,
+ additional_args=None, **kwargs):
"""Launch a subprocess for this server.
:param conf_file: path to conf_file to use as first arg
:param once: boolean, add once argument to command
:param wait: boolean, if true capture stdout with a pipe
:param daemon: boolean, if false ask server to log to console
+ :param additional_args: list of additional arguments to pass
+ on the command line
:returns: the pid of the spawned process
"""
@@ -653,6 +656,10 @@ class Server(object):
if not daemon:
# ask the server to log to console
args.append('verbose')
+ if additional_args:
+ if isinstance(additional_args, str):
+ additional_args = [additional_args]
+ args.extend(additional_args)
# figure out what we're going to do with stdio
if not daemon:
diff --git a/swift/common/utils.py b/swift/common/utils.py
index 54efdf2b18..048e64d65d 100644
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@@ -19,10 +19,12 @@ from __future__ import print_function
import base64
import binascii
+import bisect
import collections
import errno
import fcntl
import grp
+import hashlib
import hmac
import json
import math
@@ -76,6 +78,7 @@ from six.moves import range, http_client
from six.moves.urllib.parse import ParseResult
from six.moves.urllib.parse import quote as _quote
from six.moves.urllib.parse import urlparse as stdlib_urlparse
+from six import string_types
from swift import gettext_ as _
import swift.common.exceptions
@@ -409,6 +412,21 @@ def config_positive_int_value(value):
return result
+def config_float_value(value, minimum=None, maximum=None):
+ try:
+ val = float(value)
+ if minimum is not None and val < minimum:
+ raise ValueError()
+ if maximum is not None and val > maximum:
+ raise ValueError()
+ return val
+ except (TypeError, ValueError):
+ min_ = ', greater than %s' % minimum if minimum is not None else ''
+ max_ = ', less than %s' % maximum if maximum is not None else ''
+ raise ValueError('Config option must be a number%s%s, not "%s".' %
+ (min_, max_, value))
+
+
def config_auto_int_value(value, default):
"""
Returns default if value is None or 'auto'.
@@ -4370,6 +4388,553 @@ def get_md5_socket():
return md5_sockfd
+class ShardRange(object):
+ """
+ A ShardRange encapsulates sharding state related to a container including
+ lower and upper bounds that define the object namespace for which the
+ container is responsible.
+
+ Shard ranges may be persisted in a container database. Timestamps
+ associated with subsets of the shard range attributes are used to resolve
+ conflicts when a shard range needs to be merged with an existing shard
+ range record and the most recent version of an attribute should be
+ persisted.
+
+ :param name: the name of the shard range; this should take the form of a
+ path to a container i.e. /.
+ :param timestamp: a timestamp that represents the time at which the
+ shard range's ``lower``, ``upper`` or ``deleted`` attributes were
+ last modified.
+ :param lower: the lower bound of object names contained in the shard range;
+ the lower bound *is not* included in the shard range namespace.
+ :param upper: the upper bound of object names contained in the shard range;
+ the upper bound *is* included in the shard range namespace.
+ :param object_count: the number of objects in the shard range; defaults to
+ zero.
+ :param bytes_used: the number of bytes in the shard range; defaults to
+ zero.
+ :param meta_timestamp: a timestamp that represents the time at which the
+ shard range's ``object_count`` and ``bytes_used`` were last updated;
+ defaults to the value of ``timestamp``.
+ :param deleted: a boolean; if True the shard range is considered to be
+ deleted.
+ :param state: the state; must be one of ShardRange.STATES; defaults to
+ CREATED.
+ :param state_timestamp: a timestamp that represents the time at which
+ ``state`` was forced to its current value; defaults to the value of
+ ``timestamp``. This timestamp is typically not updated with every
+ change of ``state`` because in general conflicts in ``state``
+ attributes are resolved by choosing the larger ``state`` value.
+ However, when this rule does not apply, for example when changing state
+ from ``SHARDED`` to ``ACTIVE``, the ``state_timestamp`` may be advanced
+ so that the new ``state`` value is preferred over any older ``state``
+ value.
+ :param epoch: optional epoch timestamp which represents the time at which
+ sharding was enabled for a container.
+ """
+ FOUND = 10
+ CREATED = 20
+ CLEAVED = 30
+ ACTIVE = 40
+ SHRINKING = 50
+ SHARDING = 60
+ SHARDED = 70
+ STATES = {FOUND: 'found',
+ CREATED: 'created',
+ CLEAVED: 'cleaved',
+ ACTIVE: 'active',
+ SHRINKING: 'shrinking',
+ SHARDING: 'sharding',
+ SHARDED: 'sharded'}
+ STATES_BY_NAME = dict((v, k) for k, v in STATES.items())
+
+ class OuterBound(object):
+ def __eq__(self, other):
+ return isinstance(other, type(self))
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __str__(self):
+ return ''
+
+ def __repr__(self):
+ return type(self).__name__
+
+ def __bool__(self):
+ return False
+
+ __nonzero__ = __bool__
+
+ @functools.total_ordering
+ class MaxBound(OuterBound):
+ def __ge__(self, other):
+ return True
+
+ @functools.total_ordering
+ class MinBound(OuterBound):
+ def __le__(self, other):
+ return True
+
+ MIN = MinBound()
+ MAX = MaxBound()
+
+ def __init__(self, name, timestamp, lower=MIN, upper=MAX,
+ object_count=0, bytes_used=0, meta_timestamp=None,
+ deleted=False, state=None, state_timestamp=None, epoch=None):
+ self.account = self.container = self._timestamp = \
+ self._meta_timestamp = self._state_timestamp = self._epoch = None
+ self._lower = ShardRange.MIN
+ self._upper = ShardRange.MAX
+ self._deleted = False
+ self._state = None
+
+ self.name = name
+ self.timestamp = timestamp
+ self.lower = lower
+ self.upper = upper
+ self.deleted = deleted
+ self.object_count = object_count
+ self.bytes_used = bytes_used
+ self.meta_timestamp = meta_timestamp
+ self.state = self.FOUND if state is None else state
+ self.state_timestamp = state_timestamp
+ self.epoch = epoch
+
+ @classmethod
+ def _encode(cls, value):
+ if six.PY2 and isinstance(value, six.text_type):
+ return value.encode('utf-8')
+ return value
+
+ def _encode_bound(self, bound):
+ if isinstance(bound, ShardRange.OuterBound):
+ return bound
+ if not isinstance(bound, string_types):
+ raise TypeError('must be a string type')
+ return self._encode(bound)
+
+ @classmethod
+ def _make_container_name(cls, root_container, parent_container, timestamp,
+ index):
+ if not isinstance(parent_container, bytes):
+ parent_container = parent_container.encode('utf-8')
+ return "%s-%s-%s-%s" % (root_container,
+ hashlib.md5(parent_container).hexdigest(),
+ cls._to_timestamp(timestamp).internal,
+ index)
+
+ @classmethod
+ def make_path(cls, shards_account, root_container, parent_container,
+ timestamp, index):
+ """
+ Returns a path for a shard container that is valid to use as a name
+ when constructing a :class:`~swift.common.utils.ShardRange`.
+
+ :param shards_account: the hidden internal account to which the shard
+ container belongs.
+ :param root_container: the name of the root container for the shard.
+ :param parent_container: the name of the parent container for the
+ shard; for initial first generation shards this should be the same
+ as ``root_container``; for shards of shards this should be the name
+ of the sharding shard container.
+ :param timestamp: an instance of :class:`~swift.common.utils.Timestamp`
+ :param index: a unique index that will distinguish the path from any
+ other path generated using the same combination of
+ ``shards_account``, ``root_container``, ``parent_container`` and
+ ``timestamp``.
+ :return: a string of the form /
+ """
+ shard_container = cls._make_container_name(
+ root_container, parent_container, timestamp, index)
+ return '%s/%s' % (shards_account, shard_container)
+
+ @classmethod
+ def _to_timestamp(cls, timestamp):
+ if timestamp is None or isinstance(timestamp, Timestamp):
+ return timestamp
+ return Timestamp(timestamp)
+
+ @property
+ def name(self):
+ return '%s/%s' % (self.account, self.container)
+
+ @name.setter
+ def name(self, path):
+ path = self._encode(path)
+ if not path or len(path.split('/')) != 2 or not all(path.split('/')):
+ raise ValueError(
+ "Name must be of the form '/', got %r" %
+ path)
+ self.account, self.container = path.split('/')
+
+ @property
+ def timestamp(self):
+ return self._timestamp
+
+ @timestamp.setter
+ def timestamp(self, ts):
+ if ts is None:
+ raise TypeError('timestamp cannot be None')
+ self._timestamp = self._to_timestamp(ts)
+
+ @property
+ def meta_timestamp(self):
+ if self._meta_timestamp is None:
+ return self.timestamp
+ return self._meta_timestamp
+
+ @meta_timestamp.setter
+ def meta_timestamp(self, ts):
+ self._meta_timestamp = self._to_timestamp(ts)
+
+ @property
+ def lower(self):
+ return self._lower
+
+ @property
+ def lower_str(self):
+ return str(self.lower)
+
+ @lower.setter
+ def lower(self, value):
+ if value in (None, ''):
+ value = ShardRange.MIN
+ try:
+ value = self._encode_bound(value)
+ except TypeError as err:
+ raise TypeError('lower %s' % err)
+ if value > self._upper:
+ raise ValueError(
+ 'lower (%r) must be less than or equal to upper (%r)' %
+ (value, self.upper))
+ self._lower = value
+
+ @property
+ def end_marker(self):
+ return self.upper_str + '\x00' if self.upper else ''
+
+ @property
+ def upper(self):
+ return self._upper
+
+ @property
+ def upper_str(self):
+ return str(self.upper)
+
+ @upper.setter
+ def upper(self, value):
+ if value in (None, ''):
+ value = ShardRange.MAX
+ try:
+ value = self._encode_bound(value)
+ except TypeError as err:
+ raise TypeError('upper %s' % err)
+ if value < self._lower:
+ raise ValueError(
+ 'upper (%r) must be greater than or equal to lower (%r)' %
+ (value, self.lower))
+ self._upper = value
+
+ @property
+ def object_count(self):
+ return self._count
+
+ @object_count.setter
+ def object_count(self, count):
+ count = int(count)
+ if count < 0:
+ raise ValueError('object_count cannot be < 0')
+ self._count = count
+
+ @property
+ def bytes_used(self):
+ return self._bytes
+
+ @bytes_used.setter
+ def bytes_used(self, bytes_used):
+ bytes_used = int(bytes_used)
+ if bytes_used < 0:
+ raise ValueError('bytes_used cannot be < 0')
+ self._bytes = bytes_used
+
+ def update_meta(self, object_count, bytes_used, meta_timestamp=None):
+ """
+ Set the object stats metadata to the given values and update the
+ meta_timestamp to the current time.
+
+ :param object_count: should be an integer
+ :param bytes_used: should be an integer
+ :param meta_timestamp: timestamp for metadata; if not given the
+ current time will be set.
+ :raises ValueError: if ``object_count`` or ``bytes_used`` cannot be
+ cast to an int, or if meta_timestamp is neither None nor can be
+ cast to a :class:`~swift.common.utils.Timestamp`.
+ """
+ self.object_count = int(object_count)
+ self.bytes_used = int(bytes_used)
+ if meta_timestamp is None:
+ self.meta_timestamp = Timestamp.now()
+ else:
+ self.meta_timestamp = meta_timestamp
+
+ def increment_meta(self, object_count, bytes_used):
+ """
+ Increment the object stats metadata by the given values and update the
+ meta_timestamp to the current time.
+
+ :param object_count: should be an integer
+ :param bytes_used: should be an integer
+ :raises ValueError: if ``object_count`` or ``bytes_used`` cannot be
+ cast to an int.
+ """
+ self.update_meta(self.object_count + int(object_count),
+ self.bytes_used + int(bytes_used))
+
+ @classmethod
+ def resolve_state(cls, state):
+ """
+ Given a value that may be either the name or the number of a state
+ return a tuple of (state number, state name).
+
+ :param state: Either a string state name or an integer state number.
+ :return: A tuple (state number, state name)
+ :raises ValueError: if ``state`` is neither a valid state name nor a
+ valid state number.
+ """
+ try:
+ state = state.lower()
+ state_num = cls.STATES_BY_NAME[state]
+ except (KeyError, AttributeError):
+ try:
+ state_name = cls.STATES[state]
+ except KeyError:
+ raise ValueError('Invalid state %r' % state)
+ else:
+ state_num = state
+ else:
+ state_name = state
+ return state_num, state_name
+
+ @property
+ def state(self):
+ return self._state
+
+ @state.setter
+ def state(self, state):
+ try:
+ float_state = float(state)
+ int_state = int(float_state)
+ except (ValueError, TypeError):
+ raise ValueError('Invalid state %r' % state)
+ if int_state != float_state or int_state not in self.STATES:
+ raise ValueError('Invalid state %r' % state)
+ self._state = int_state
+
+ @property
+ def state_text(self):
+ return self.STATES[self.state]
+
+ @property
+ def state_timestamp(self):
+ if self._state_timestamp is None:
+ return self.timestamp
+ return self._state_timestamp
+
+ @state_timestamp.setter
+ def state_timestamp(self, ts):
+ self._state_timestamp = self._to_timestamp(ts)
+
+ @property
+ def epoch(self):
+ return self._epoch
+
+ @epoch.setter
+ def epoch(self, epoch):
+ self._epoch = self._to_timestamp(epoch)
+
+ def update_state(self, state, state_timestamp=None):
+ """
+ Set state to the given value and optionally update the state_timestamp
+ to the given time.
+
+ :param state: new state, should be an integer
+ :param state_timestamp: timestamp for state; if not given the
+ state_timestamp will not be changed.
+ :return: True if the state or state_timestamp was changed, False
+ otherwise
+ """
+ if state_timestamp is None and self.state == state:
+ return False
+ self.state = state
+ if state_timestamp is not None:
+ self.state_timestamp = state_timestamp
+ return True
+
+ @property
+ def deleted(self):
+ return self._deleted
+
+ @deleted.setter
+ def deleted(self, value):
+ self._deleted = bool(value)
+
+ def set_deleted(self, timestamp=None):
+ """
+ Mark the shard range deleted and set timestamp to the current time.
+
+ :param timestamp: optional timestamp to set; if not given the
+ current time will be set.
+ :return: True if the deleted attribute or timestamp was changed, False
+ otherwise
+ """
+ if timestamp is None and self.deleted:
+ return False
+ self.deleted = True
+ self.timestamp = timestamp or Timestamp.now()
+ return True
+
+ def __contains__(self, item):
+ # test if the given item is within the namespace
+ if item == '':
+ return False
+ item = self._encode_bound(item)
+ return self.lower < item <= self.upper
+
+ def __lt__(self, other):
+ # a ShardRange is less than other if its entire namespace is less than
+ # other; if other is another ShardRange that implies that this
+ # ShardRange's upper must be less than or equal to the other
+ # ShardRange's lower
+ if self.upper == ShardRange.MAX:
+ return False
+ if isinstance(other, ShardRange):
+ return self.upper <= other.lower
+ elif other is None:
+ return True
+ else:
+ return self.upper < other
+
+ def __gt__(self, other):
+ # a ShardRange is greater than other if its entire namespace is greater
+ # than other; if other is another ShardRange that implies that this
+ # ShardRange's lower must be less greater than or equal to the other
+ # ShardRange's upper
+ if self.lower == ShardRange.MIN:
+ return False
+ if isinstance(other, ShardRange):
+ return self.lower >= other.upper
+ elif other is None:
+ return False
+ else:
+ return self.lower >= other
+
+ def __eq__(self, other):
+ # test for equality of range bounds only
+ if not isinstance(other, ShardRange):
+ return False
+ return self.lower == other.lower and self.upper == other.upper
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __repr__(self):
+ return '%s<%r to %r as of %s, (%d, %d) as of %s, %s as of %s>' % (
+ self.__class__.__name__, self.lower, self.upper,
+ self.timestamp.internal, self.object_count, self.bytes_used,
+ self.meta_timestamp.internal, self.state_text,
+ self.state_timestamp.internal)
+
+ def entire_namespace(self):
+ """
+ Returns True if the ShardRange includes the entire namespace, False
+ otherwise.
+ """
+ return (self.lower == ShardRange.MIN and
+ self.upper == ShardRange.MAX)
+
+ def overlaps(self, other):
+ """
+ Returns True if the ShardRange namespace overlaps with the other
+ ShardRange's namespace.
+
+ :param other: an instance of :class:`~swift.common.utils.ShardRange`
+ """
+ if not isinstance(other, ShardRange):
+ return False
+ return max(self.lower, other.lower) < min(self.upper, other.upper)
+
+ def includes(self, other):
+ """
+ Returns True if this namespace includes the whole of the other
+ namespace, False otherwise.
+
+ :param other: an instance of :class:`~swift.common.utils.ShardRange`
+ """
+ return (self.lower <= other.lower) and (other.upper <= self.upper)
+
+ def __iter__(self):
+ yield 'name', self.name
+ yield 'timestamp', self.timestamp.internal
+ yield 'lower', str(self.lower)
+ yield 'upper', str(self.upper)
+ yield 'object_count', self.object_count
+ yield 'bytes_used', self.bytes_used
+ yield 'meta_timestamp', self.meta_timestamp.internal
+ yield 'deleted', 1 if self.deleted else 0
+ yield 'state', self.state
+ yield 'state_timestamp', self.state_timestamp.internal
+ yield 'epoch', self.epoch.internal if self.epoch is not None else None
+
+ def copy(self, timestamp=None, **kwargs):
+ """
+ Creates a copy of the ShardRange.
+
+ :param timestamp: (optional) If given, the returned ShardRange will
+ have all of its timestamps set to this value. Otherwise the
+ returned ShardRange will have the original timestamps.
+ :return: an instance of :class:`~swift.common.utils.ShardRange`
+ """
+ new = ShardRange.from_dict(dict(self, **kwargs))
+ if timestamp:
+ new.timestamp = timestamp
+ new.meta_timestamp = new.state_timestamp = None
+ return new
+
+ @classmethod
+ def from_dict(cls, params):
+ """
+ Return an instance constructed using the given dict of params. This
+ method is deliberately less flexible than the class `__init__()` method
+ and requires all of the `__init__()` args to be given in the dict of
+ params.
+
+ :param params: a dict of parameters
+ :return: an instance of this class
+ """
+ return cls(
+ params['name'], params['timestamp'], params['lower'],
+ params['upper'], params['object_count'], params['bytes_used'],
+ params['meta_timestamp'], params['deleted'], params['state'],
+ params['state_timestamp'], params['epoch'])
+
+
+def find_shard_range(item, ranges):
+ """
+ Find a ShardRange in given list of ``shard_ranges`` whose namespace
+ contains ``item``.
+
+ :param item: The item for a which a ShardRange is to be found.
+ :param ranges: a sorted list of ShardRanges.
+ :return: the ShardRange whose namespace contains ``item``, or None if
+ no suitable range is found.
+ """
+ index = bisect.bisect_left(ranges, item)
+ if index != len(ranges) and item in ranges[index]:
+ return ranges[index]
+ return None
+
+
def modify_priority(conf, logger):
"""
Modify priority by nice and ionice.
@@ -4750,3 +5315,110 @@ def distribute_evenly(items, num_buckets):
for index, item in enumerate(items):
out[index % num_buckets].append(item)
return out
+
+
+def get_redirect_data(response):
+ """
+ Extract a redirect location from a response's headers.
+
+ :param response: a response
+ :return: a tuple of (path, Timestamp) if a Location header is found,
+ otherwise None
+ :raises ValueError: if the Location header is found but a
+ X-Backend-Redirect-Timestamp is not found, or if there is a problem
+ with the format of etiher header
+ """
+ headers = HeaderKeyDict(response.getheaders())
+ if 'Location' not in headers:
+ return None
+ location = urlparse(headers['Location']).path
+ account, container, _junk = split_path(location, 2, 3, True)
+ timestamp_val = headers.get('X-Backend-Redirect-Timestamp')
+ try:
+ timestamp = Timestamp(timestamp_val)
+ except (TypeError, ValueError):
+ raise ValueError('Invalid timestamp value: %s' % timestamp_val)
+ return '%s/%s' % (account, container), timestamp
+
+
+def parse_db_filename(filename):
+ """
+ Splits a db filename into three parts: the hash, the epoch, and the
+ extension.
+
+ >>> parse_db_filename("ab2134.db")
+ ('ab2134', None, '.db')
+ >>> parse_db_filename("ab2134_1234567890.12345.db")
+ ('ab2134', '1234567890.12345', '.db')
+
+ :param filename: A db file basename or path to a db file.
+ :return: A tuple of (hash , epoch, extension). ``epoch`` may be None.
+ :raises ValueError: if ``filename`` is not a path to a file.
+ """
+ filename = os.path.basename(filename)
+ if not filename:
+ raise ValueError('Path to a file required.')
+ name, ext = os.path.splitext(filename)
+ parts = name.split('_')
+ hash_ = parts.pop(0)
+ epoch = parts[0] if parts else None
+ return hash_, epoch, ext
+
+
+def make_db_file_path(db_path, epoch):
+ """
+ Given a path to a db file, return a modified path whose filename part has
+ the given epoch.
+
+ A db filename takes the form [_].db; this method replaces the
+ part of the given ``db_path`` with the given ``epoch`` value.
+
+ :param db_path: Path to a db file that does not necessarily exist.
+ :param epoch: A string that will be used as the epoch in the new path's
+ filename; the value will be normalized to the normal string
+ representation of a :class:`~swift.common.utils.Timestamp`.
+ :return: A modified path to a db file.
+ :raises ValueError: if the ``epoch`` is not valid for constructing a
+ :class:`~swift.common.utils.Timestamp`.
+ """
+ if epoch is None:
+ raise ValueError('epoch must not be None')
+ epoch = Timestamp(epoch).normal
+ hash_, _, ext = parse_db_filename(db_path)
+ db_dir = os.path.dirname(db_path)
+ return os.path.join(db_dir, '%s_%s%s' % (hash_, epoch, ext))
+
+
+def get_db_files(db_path):
+ """
+ Given the path to a db file, return a sorted list of all valid db files
+ that actually exist in that path's dir. A valid db filename has the form:
+
+ [_].db
+
+ where matches the part of the given db_path as would be
+ parsed by :meth:`~swift.utils.common.parse_db_filename`.
+
+ :param db_path: Path to a db file that does not necessarily exist.
+ :return: List of valid db files that do exist in the dir of the
+ ``db_path``. This list may be empty.
+ """
+ db_dir, db_file = os.path.split(db_path)
+ try:
+ files = os.listdir(db_dir)
+ except OSError as err:
+ if err.errno == errno.ENOENT:
+ return []
+ raise
+ if not files:
+ return []
+ match_hash, epoch, ext = parse_db_filename(db_file)
+ results = []
+ for f in files:
+ hash_, epoch, ext = parse_db_filename(f)
+ if ext != '.db':
+ continue
+ if hash_ != match_hash:
+ continue
+ results.append(os.path.join(db_dir, f))
+ return sorted(results)
diff --git a/swift/common/wsgi.py b/swift/common/wsgi.py
index 752e8767aa..2a9409d92e 100644
--- a/swift/common/wsgi.py
+++ b/swift/common/wsgi.py
@@ -45,6 +45,9 @@ from swift.common.utils import capture_stdio, disable_fallocate, \
validate_configuration, get_hub, config_auto_int_value, \
reiterate
+SIGNUM_TO_NAME = {getattr(signal, n): n for n in dir(signal)
+ if n.startswith('SIG') and '_' not in n}
+
# Set maximum line size of message headers to be accepted.
wsgi.MAX_HEADER_LINE = constraints.MAX_HEADER_SIZE
@@ -559,7 +562,8 @@ class WorkersStrategy(object):
:param int pid: The new worker process' PID
"""
- self.logger.notice('Started child %s' % pid)
+ self.logger.notice('Started child %s from parent %s',
+ pid, os.getpid())
self.children.append(pid)
def register_worker_exit(self, pid):
@@ -569,7 +573,8 @@ class WorkersStrategy(object):
:param int pid: The PID of the worker that exited.
"""
- self.logger.error('Removing dead child %s' % pid)
+ self.logger.error('Removing dead child %s from parent %s',
+ pid, os.getpid())
self.children.remove(pid)
def shutdown_sockets(self):
@@ -935,24 +940,17 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
run_server(conf, logger, no_fork_sock, global_conf=global_conf)
return 0
- def kill_children(*args):
- """Kills the entire process group."""
- logger.error('SIGTERM received')
- signal.signal(signal.SIGTERM, signal.SIG_IGN)
- running[0] = False
- os.killpg(0, signal.SIGTERM)
+ def stop_with_signal(signum, *args):
+ """Set running flag to False and capture the signum"""
+ running_context[0] = False
+ running_context[1] = signum
- def hup(*args):
- """Shuts down the server, but allows running requests to complete"""
- logger.error('SIGHUP received')
- signal.signal(signal.SIGHUP, signal.SIG_IGN)
- running[0] = False
+ # context to hold boolean running state and stop signum
+ running_context = [True, None]
+ signal.signal(signal.SIGTERM, stop_with_signal)
+ signal.signal(signal.SIGHUP, stop_with_signal)
- running = [True]
- signal.signal(signal.SIGTERM, kill_children)
- signal.signal(signal.SIGHUP, hup)
-
- while running[0]:
+ while running_context[0]:
for sock, sock_info in strategy.new_worker_socks():
pid = os.fork()
if pid == 0:
@@ -992,11 +990,23 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
sleep(0.01)
except KeyboardInterrupt:
logger.notice('User quit')
- running[0] = False
+ running_context[0] = False
break
+ if running_context[1] is not None:
+ try:
+ signame = SIGNUM_TO_NAME[running_context[1]]
+ except KeyError:
+ logger.error('Stopping with unexpected signal %r' %
+ running_context[1])
+ else:
+ logger.error('%s received', signame)
+ if running_context[1] == signal.SIGTERM:
+ os.killpg(0, signal.SIGTERM)
+
strategy.shutdown_sockets()
- logger.notice('Exited')
+ signal.signal(signal.SIGTERM, signal.SIG_IGN)
+ logger.notice('Exited (%s)', os.getpid())
return 0
diff --git a/swift/container/backend.py b/swift/container/backend.py
index bab618286a..040b79ad0b 100644
--- a/swift/container/backend.py
+++ b/swift/container/backend.py
@@ -15,6 +15,7 @@
"""
Pluggable Back-ends for Container Server
"""
+import errno
import os
from uuid import uuid4
@@ -23,16 +24,45 @@ import six
import six.moves.cPickle as pickle
from six.moves import range
import sqlite3
+from eventlet import tpool
+from swift.common.constraints import CONTAINER_LISTING_LIMIT
+from swift.common.exceptions import LockTimeout
from swift.common.utils import Timestamp, encode_timestamps, \
- decode_timestamps, extract_swift_bytes
-from swift.common.db import DatabaseBroker, utf8encode
-
+ decode_timestamps, extract_swift_bytes, storage_directory, hash_path, \
+ ShardRange, renamer, find_shard_range, MD5_OF_EMPTY_STRING, mkdirs, \
+ get_db_files, parse_db_filename, make_db_file_path, split_path
+from swift.common.db import DatabaseBroker, utf8encode, BROKER_TIMEOUT, \
+ zero_like, DatabaseAlreadyExists
SQLITE_ARG_LIMIT = 999
DATADIR = 'containers'
+RECORD_TYPE_OBJECT = 'object'
+RECORD_TYPE_SHARD = 'shard'
+SHARD_RANGE_TABLE = 'shard_range'
+
+NOTFOUND = 'not_found'
+UNSHARDED = 'unsharded'
+SHARDING = 'sharding'
+SHARDED = 'sharded'
+COLLAPSED = 'collapsed'
+
+
+SHARD_STATS_STATES = [ShardRange.ACTIVE, ShardRange.SHARDING,
+ ShardRange.SHRINKING]
+SHARD_LISTING_STATES = SHARD_STATS_STATES + [ShardRange.CLEAVED]
+SHARD_UPDATE_STATES = [ShardRange.CREATED, ShardRange.CLEAVED,
+ ShardRange.ACTIVE, ShardRange.SHARDING]
+
+
+# attribute names in order used when transforming shard ranges from dicts to
+# tuples and vice-versa
+SHARD_RANGE_KEYS = ('name', 'timestamp', 'lower', 'upper', 'object_count',
+ 'bytes_used', 'meta_timestamp', 'deleted', 'state',
+ 'state_timestamp', 'epoch')
+
POLICY_STAT_TABLE_CREATE = '''
CREATE TABLE policy_stat (
storage_policy_index INTEGER PRIMARY KEY,
@@ -221,12 +251,220 @@ def update_new_item_from_existing(new_item, existing):
return any(newer_than_existing)
+def merge_shards(shard_data, existing):
+ """
+ Compares ``shard_data`` with ``existing`` and updates ``shard_data`` with
+ any items of ``existing`` that take precedence over the corresponding item
+ in ``shard_data``.
+
+ :param shard_data: a dict representation of shard range that may be
+ modified by this method.
+ :param existing: a dict representation of shard range.
+ :returns: True if ``shard data`` has any item(s) that are considered to
+ take precedence over the corresponding item in ``existing``
+ """
+ if not existing:
+ return True
+ if existing['timestamp'] < shard_data['timestamp']:
+ # note that currently we do not roll forward any meta or state from
+ # an item that was created at older time, newer created time trumps
+ return True
+ elif existing['timestamp'] > shard_data['timestamp']:
+ return False
+
+ new_content = False
+ # timestamp must be the same, so preserve existing range bounds and deleted
+ for k in ('lower', 'upper', 'deleted'):
+ shard_data[k] = existing[k]
+
+ # now we need to look for meta data updates
+ if existing['meta_timestamp'] >= shard_data['meta_timestamp']:
+ for k in ('object_count', 'bytes_used', 'meta_timestamp'):
+ shard_data[k] = existing[k]
+ else:
+ new_content = True
+
+ if (existing['state_timestamp'] == shard_data['state_timestamp']
+ and shard_data['state'] > existing['state']):
+ new_content = True
+ elif existing['state_timestamp'] >= shard_data['state_timestamp']:
+ for k in ('state', 'state_timestamp', 'epoch'):
+ shard_data[k] = existing[k]
+ else:
+ new_content = True
+ return new_content
+
+
class ContainerBroker(DatabaseBroker):
- """Encapsulates working with a container database."""
+ """
+ Encapsulates working with a container database.
+
+ Note that this may involve multiple on-disk DB files if the container
+ becomes sharded:
+
+ * :attr:`_db_file` is the path to the legacy container DB name, i.e.
+ ``.db``. This file should exist for an initialised broker that
+ has never been sharded, but will not exist once a container has been
+ sharded.
+ * :attr:`db_files` is a list of existing db files for the broker. This
+ list should have at least one entry for an initialised broker, and
+ should have two entries while a broker is in SHARDING state.
+ * :attr:`db_file` is the path to whichever db is currently authoritative
+ for the container. Depending on the container's state, this may not be
+ the same as the ``db_file`` argument given to :meth:`~__init__`, unless
+ ``force_db_file`` is True in which case :attr:`db_file` is always equal
+ to the ``db_file`` argument given to :meth:`~__init__`.
+ * :attr:`pending_file` is always equal to :attr:`_db_file` extended with
+ ``.pending``, i.e. ``.db.pending``.
+ """
db_type = 'container'
db_contains_type = 'object'
db_reclaim_timestamp = 'created_at'
+ def __init__(self, db_file, timeout=BROKER_TIMEOUT, logger=None,
+ account=None, container=None, pending_timeout=None,
+ stale_reads_ok=False, skip_commits=False,
+ force_db_file=False):
+ self._init_db_file = db_file
+ if db_file == ':memory:':
+ base_db_file = db_file
+ else:
+ db_dir = os.path.dirname(db_file)
+ hash_, other, ext = parse_db_filename(db_file)
+ base_db_file = os.path.join(db_dir, hash_ + ext)
+ super(ContainerBroker, self).__init__(
+ base_db_file, timeout, logger, account, container, pending_timeout,
+ stale_reads_ok, skip_commits=skip_commits)
+ # the root account and container are populated on demand
+ self._root_account = self._root_container = None
+ self._force_db_file = force_db_file
+ self._db_files = None
+
+ @classmethod
+ def create_broker(self, device_path, part, account, container, logger=None,
+ epoch=None, put_timestamp=None,
+ storage_policy_index=None):
+ """
+ Create a ContainerBroker instance. If the db doesn't exist, initialize
+ the db file.
+
+ :param device_path: device path
+ :param part: partition number
+ :param account: account name string
+ :param container: container name string
+ :param logger: a logger instance
+ :param epoch: a timestamp to include in the db filename
+ :param put_timestamp: initial timestamp if broker needs to be
+ initialized
+ :param storage_policy_index: the storage policy index
+ :return: a :class:`swift.container.backend.ContainerBroker` instance
+ """
+ hsh = hash_path(account, container)
+ db_dir = storage_directory(DATADIR, part, hsh)
+ db_path = os.path.join(device_path, db_dir, hsh + '.db')
+ if epoch:
+ db_path = make_db_file_path(db_path, epoch)
+ broker = ContainerBroker(db_path, account=account, container=container,
+ logger=logger)
+ if not os.path.exists(broker.db_file):
+ try:
+ broker.initialize(put_timestamp, storage_policy_index)
+ except DatabaseAlreadyExists:
+ pass
+ return broker
+
+ def get_db_state(self):
+ """
+ Returns the current state of on disk db files.
+ """
+ if self._db_file == ':memory:':
+ return UNSHARDED
+ if not self.db_files:
+ return NOTFOUND
+ if len(self.db_files) > 1:
+ return SHARDING
+ if self.db_epoch is None:
+ # never been sharded
+ return UNSHARDED
+ if self.db_epoch != self._own_shard_range().epoch:
+ return UNSHARDED
+ if not self.get_shard_ranges():
+ return COLLAPSED
+ return SHARDED
+
+ def sharding_initiated(self):
+ """
+ Returns True if a broker has shard range state that would be necessary
+ for sharding to have been initiated, False otherwise.
+ """
+ own_shard_range = self.get_own_shard_range()
+ if own_shard_range.state in (ShardRange.SHARDING,
+ ShardRange.SHRINKING,
+ ShardRange.SHARDED):
+ return bool(self.get_shard_ranges())
+ return False
+
+ def sharding_required(self):
+ """
+ Returns True if a broker has shard range state that would be necessary
+ for sharding to have been initiated but has not yet completed sharding,
+ False otherwise.
+ """
+ db_state = self.get_db_state()
+ return (db_state == SHARDING or
+ (db_state == UNSHARDED and self.sharding_initiated()))
+
+ def is_sharded(self):
+ return self.get_db_state() == SHARDED
+
+ def reload_db_files(self):
+ """
+ Reloads the cached list of valid on disk db files for this broker.
+ """
+ if self._db_file == ':memory:':
+ return
+ # reset connection so the next access will use the correct DB file
+ self.conn = None
+ self._db_files = get_db_files(self._init_db_file)
+
+ @property
+ def db_files(self):
+ """
+ Gets the cached list of valid db files that exist on disk for this
+ broker.
+
+ The cached list may be refreshed by calling
+ :meth:`~swift.container.backend.ContainerBroker.reload_db_files`.
+
+ :return: A list of paths to db files ordered by ascending epoch;
+ the list may be empty.
+ """
+ if not self._db_files:
+ self.reload_db_files()
+ return self._db_files
+
+ @property
+ def db_file(self):
+ """
+ Get the path to the primary db file for this broker. This is typically
+ the db file for the most recent sharding epoch. However, if no db files
+ exist on disk, or if ``force_db_file`` was True when the broker was
+ constructed, then the primary db file is the file passed to the broker
+ constructor.
+
+ :return: A path to a db file; the file does not necessarily exist.
+ """
+ if self._force_db_file:
+ return self._init_db_file
+ if self.db_files:
+ return self.db_files[-1]
+ return self._init_db_file
+
+ @property
+ def db_epoch(self):
+ hash_, epoch, ext = parse_db_filename(self.db_file)
+ return epoch
+
@property
def storage_policy_index(self):
if not hasattr(self, '_storage_policy_index'):
@@ -234,6 +472,11 @@ class ContainerBroker(DatabaseBroker):
self.get_info()['storage_policy_index']
return self._storage_policy_index
+ @property
+ def path(self):
+ self._populate_instance_cache()
+ return '%s/%s' % (self.account, self.container)
+
def _initialize(self, conn, put_timestamp, storage_policy_index):
"""
Create a brand new container database (tables, indices, triggers, etc.)
@@ -250,6 +493,8 @@ class ContainerBroker(DatabaseBroker):
self.create_policy_stat_table(conn, storage_policy_index)
self.create_container_info_table(conn, put_timestamp,
storage_policy_index)
+ self.create_shard_range_table(conn)
+ self._db_files = None
def create_object_table(self, conn):
"""
@@ -331,6 +576,40 @@ class ContainerBroker(DatabaseBroker):
VALUES (?)
""", (storage_policy_index,))
+ def create_shard_range_table(self, conn):
+ """
+ Create the shard_range table which is specific to the container DB.
+
+ :param conn: DB connection object
+ """
+ # Use execute (not executescript) so we get the benefits of our
+ # GreenDBConnection. Creating a table requires a whole-DB lock;
+ # *any* in-progress cursor will otherwise trip a "database is locked"
+ # error.
+ conn.execute("""
+ CREATE TABLE %s (
+ ROWID INTEGER PRIMARY KEY AUTOINCREMENT,
+ name TEXT,
+ timestamp TEXT,
+ lower TEXT,
+ upper TEXT,
+ object_count INTEGER DEFAULT 0,
+ bytes_used INTEGER DEFAULT 0,
+ meta_timestamp TEXT,
+ deleted INTEGER DEFAULT 0,
+ state INTEGER,
+ state_timestamp TEXT,
+ epoch TEXT
+ );
+ """ % SHARD_RANGE_TABLE)
+
+ conn.execute("""
+ CREATE TRIGGER shard_range_update BEFORE UPDATE ON %s
+ BEGIN
+ SELECT RAISE(FAIL, 'UPDATE not allowed; DELETE and INSERT');
+ END;
+ """ % SHARD_RANGE_TABLE)
+
def get_db_version(self, conn):
if self._db_version == -1:
self._db_version = 0
@@ -340,6 +619,11 @@ class ContainerBroker(DatabaseBroker):
self._db_version = 1
return self._db_version
+ def _get_deleted_key(self, connection):
+ if self.get_db_version(connection) < 1:
+ return '+deleted'
+ return 'deleted'
+
def _newid(self, conn):
conn.execute('''
UPDATE container_stat
@@ -383,12 +667,7 @@ class ContainerBroker(DatabaseBroker):
'ctype_timestamp': content_type_timestamp,
'meta_timestamp': meta_timestamp})
- def empty(self):
- """
- Check if container DB is empty.
-
- :returns: True if the database has no active objects, False otherwise
- """
+ def _empty(self):
self._commit_puts_stale_ok()
with self.get() as conn:
try:
@@ -401,7 +680,27 @@ class ContainerBroker(DatabaseBroker):
raise
row = conn.execute(
'SELECT object_count from container_stat').fetchone()
- return (row[0] == 0)
+ return zero_like(row[0])
+
+ def empty(self):
+ """
+ Check if container DB is empty.
+
+ This method uses more stringent checks on object count than
+ :meth:`is_deleted`: this method checks that there are no objects in any
+ policy; if the container is in the process of sharding then both fresh
+ and retiring databases are checked to be empty; if a root container has
+ shard ranges then they are checked to be empty.
+
+ :returns: True if the database has no active objects, False otherwise
+ """
+ if not all(broker._empty() for broker in self.get_brokers()):
+ return False
+ if self.is_root_container() and self.sharding_initiated():
+ # sharded shards don't get updates from their shards so their shard
+ # usage should not be relied upon
+ return self.get_shard_usage()['object_count'] <= 0
+ return True
def delete_object(self, name, timestamp, storage_policy_index=0):
"""
@@ -447,6 +746,43 @@ class ContainerBroker(DatabaseBroker):
'meta_timestamp': meta_timestamp}
self.put_record(record)
+ def remove_objects(self, lower, upper, max_row=None):
+ """
+ Removes object records in the given namespace range from the object
+ table.
+
+ Note that objects are removed regardless of their storage_policy_index.
+
+ :param lower: defines the lower bound of object names that will be
+ removed; names greater than this value will be removed; names less
+ than or equal to this value will not be removed.
+ :param upper: defines the upper bound of object names that will be
+ removed; names less than or equal to this value will be removed;
+ names greater than this value will not be removed. The empty string
+ is interpreted as there being no upper bound.
+ :param max_row: if specified only rows less than or equal to max_row
+ will be removed
+ """
+ query_conditions = []
+ query_args = []
+ if max_row is not None:
+ query_conditions.append('ROWID <= ?')
+ query_args.append(str(max_row))
+ if lower:
+ query_conditions.append('name > ?')
+ query_args.append(lower)
+ if upper:
+ query_conditions.append('name <= ?')
+ query_args.append(upper)
+
+ query = 'DELETE FROM object WHERE deleted in (0, 1)'
+ if query_conditions:
+ query += ' AND ' + ' AND '.join(query_conditions)
+
+ with self.get() as conn:
+ conn.execute(query, query_args)
+ conn.commit()
+
def _is_deleted_info(self, object_count, put_timestamp, delete_timestamp,
**kwargs):
"""
@@ -457,12 +793,17 @@ class ContainerBroker(DatabaseBroker):
# The container is considered deleted if the delete_timestamp
# value is greater than the put_timestamp, and there are no
# objects in the container.
- return (object_count in (None, '', 0, '0')) and (
+ return zero_like(object_count) and (
Timestamp(delete_timestamp) > Timestamp(put_timestamp))
def _is_deleted(self, conn):
"""
- Check container_stat view and evaluate info.
+ Check if the DB is considered to be deleted.
+
+ This object count used in this check is the same as the container
+ object count that would be returned in the result of :meth:`get_info`
+ and exposed to a client i.e. it is based on the container_stat view for
+ the current storage policy index or relevant shard range usage.
:param conn: database conn
@@ -471,8 +812,21 @@ class ContainerBroker(DatabaseBroker):
info = conn.execute('''
SELECT put_timestamp, delete_timestamp, object_count
FROM container_stat''').fetchone()
+ info = dict(info)
+ info.update(self._get_alternate_object_stats()[1])
return self._is_deleted_info(**info)
+ def is_reclaimable(self, now, reclaim_age):
+ with self.get() as conn:
+ info = conn.execute('''
+ SELECT put_timestamp, delete_timestamp
+ FROM container_stat''').fetchone()
+ if (Timestamp(now - reclaim_age) >
+ Timestamp(info['delete_timestamp']) >
+ Timestamp(info['put_timestamp'])):
+ return self.empty()
+ return False
+
def get_info_is_deleted(self):
"""
Get the is_deleted status and info for the container.
@@ -485,6 +839,73 @@ class ContainerBroker(DatabaseBroker):
info = self.get_info()
return info, self._is_deleted_info(**info)
+ def get_replication_info(self):
+ info = super(ContainerBroker, self).get_replication_info()
+ info['shard_max_row'] = self.get_max_row('shard_ranges')
+ return info
+
+ def _do_get_info_query(self, conn):
+ data = None
+ trailing_sync = 'x_container_sync_point1, x_container_sync_point2'
+ trailing_pol = 'storage_policy_index'
+ errors = set()
+ while not data:
+ try:
+ data = conn.execute(('''
+ SELECT account, container, created_at, put_timestamp,
+ delete_timestamp, status_changed_at,
+ object_count, bytes_used,
+ reported_put_timestamp, reported_delete_timestamp,
+ reported_object_count, reported_bytes_used, hash,
+ id, %s, %s
+ FROM container_stat
+ ''') % (trailing_sync, trailing_pol)).fetchone()
+ except sqlite3.OperationalError as err:
+ err_msg = str(err)
+ if err_msg in errors:
+ # only attempt migration once
+ raise
+ errors.add(err_msg)
+ if 'no such column: storage_policy_index' in err_msg:
+ trailing_pol = '0 AS storage_policy_index'
+ elif 'no such column: x_container_sync_point' in err_msg:
+ trailing_sync = '-1 AS x_container_sync_point1, ' \
+ '-1 AS x_container_sync_point2'
+ else:
+ raise
+ data = dict(data)
+ # populate instance cache
+ self._storage_policy_index = data['storage_policy_index']
+ self.account = data['account']
+ self.container = data['container']
+ return data
+
+ def _get_info(self):
+ self._commit_puts_stale_ok()
+ with self.get() as conn:
+ return self._do_get_info_query(conn)
+
+ def _populate_instance_cache(self, conn=None):
+ # load cached instance attributes from the database if necessary
+ if self.container is None:
+ if conn:
+ self._do_get_info_query(conn)
+ else:
+ with self.get() as conn:
+ self._do_get_info_query(conn)
+
+ def _get_alternate_object_stats(self):
+ state = self.get_db_state()
+ if state == SHARDING:
+ other_info = self.get_brokers()[0]._get_info()
+ stats = {'object_count': other_info['object_count'],
+ 'bytes_used': other_info['bytes_used']}
+ elif state == SHARDED and self.is_root_container():
+ stats = self.get_shard_usage()
+ else:
+ stats = {}
+ return state, stats
+
def get_info(self):
"""
Get global data for the container.
@@ -494,44 +915,14 @@ class ContainerBroker(DatabaseBroker):
object_count, bytes_used, reported_put_timestamp,
reported_delete_timestamp, reported_object_count,
reported_bytes_used, hash, id, x_container_sync_point1,
- x_container_sync_point2, and storage_policy_index.
+ x_container_sync_point2, and storage_policy_index,
+ db_state.
"""
- self._commit_puts_stale_ok()
- with self.get() as conn:
- data = None
- trailing_sync = 'x_container_sync_point1, x_container_sync_point2'
- trailing_pol = 'storage_policy_index'
- errors = set()
- while not data:
- try:
- data = conn.execute(('''
- SELECT account, container, created_at, put_timestamp,
- delete_timestamp, status_changed_at,
- object_count, bytes_used,
- reported_put_timestamp, reported_delete_timestamp,
- reported_object_count, reported_bytes_used, hash,
- id, %s, %s
- FROM container_stat
- ''') % (trailing_sync, trailing_pol)).fetchone()
- except sqlite3.OperationalError as err:
- err_msg = str(err)
- if err_msg in errors:
- # only attempt migration once
- raise
- errors.add(err_msg)
- if 'no such column: storage_policy_index' in err_msg:
- trailing_pol = '0 AS storage_policy_index'
- elif 'no such column: x_container_sync_point' in err_msg:
- trailing_sync = '-1 AS x_container_sync_point1, ' \
- '-1 AS x_container_sync_point2'
- else:
- raise
- data = dict(data)
- # populate instance cache
- self._storage_policy_index = data['storage_policy_index']
- self.account = data['account']
- self.container = data['container']
- return data
+ data = self._get_info()
+ state, stats = self._get_alternate_object_stats()
+ data.update(stats)
+ data['db_state'] = state
+ return data
def set_x_container_sync_points(self, sync_point1, sync_point2):
with self.get() as conn:
@@ -657,7 +1048,9 @@ class ContainerBroker(DatabaseBroker):
conn.commit()
def list_objects_iter(self, limit, marker, end_marker, prefix, delimiter,
- path=None, storage_policy_index=0, reverse=False):
+ path=None, storage_policy_index=0, reverse=False,
+ include_deleted=False, since_row=None,
+ transform_func=None, all_policies=False):
"""
Get a list of objects sorted by name starting at marker onward, up
to limit entries. Entries will begin with the prefix and will not
@@ -672,10 +1065,29 @@ class ContainerBroker(DatabaseBroker):
the path
:param storage_policy_index: storage policy index for query
:param reverse: reverse the result order.
-
+ :param include_deleted: if True, include only deleted objects; if
+ False (default), include only undeleted objects; otherwise, include
+ both deleted and undeleted objects.
+ :param since_row: include only items whose ROWID is greater than
+ the given row id; by default all rows are included.
+ :param transform_func: an optional function that if given will be
+ called for each object to get a transformed version of the object
+ to include in the listing; should have same signature as
+ :meth:`~_transform_record`; defaults to :meth:`~_transform_record`.
+ :param all_policies: if True, include objects for all storage policies
+ ignoring any value given for ``storage_policy_index``
:returns: list of tuples of (name, created_at, size, content_type,
- etag)
+ etag, deleted)
"""
+ if include_deleted is True:
+ deleted_arg = ' = 1'
+ elif include_deleted is False:
+ deleted_arg = ' = 0'
+ else:
+ deleted_arg = ' in (0, 1)'
+
+ if transform_func is None:
+ transform_func = self._transform_record
delim_force_gte = False
(marker, end_marker, prefix, delimiter, path) = utf8encode(
marker, end_marker, prefix, delimiter, path)
@@ -695,60 +1107,71 @@ class ContainerBroker(DatabaseBroker):
orig_marker = marker
with self.get() as conn:
results = []
+ deleted_key = self._get_deleted_key(conn)
+ query_keys = ['name', 'created_at', 'size', 'content_type',
+ 'etag', deleted_key]
while len(results) < limit:
- query = '''SELECT name, created_at, size, content_type, etag
- FROM object WHERE'''
query_args = []
+ query_conditions = []
if end_marker and (not prefix or end_marker < end_prefix):
- query += ' name < ? AND'
+ query_conditions.append('name < ?')
query_args.append(end_marker)
elif prefix:
- query += ' name < ? AND'
+ query_conditions.append('name < ?')
query_args.append(end_prefix)
if delim_force_gte:
- query += ' name >= ? AND'
+ query_conditions.append('name >= ?')
query_args.append(marker)
# Always set back to False
delim_force_gte = False
elif marker and marker >= prefix:
- query += ' name > ? AND'
+ query_conditions.append('name > ?')
query_args.append(marker)
elif prefix:
- query += ' name >= ? AND'
+ query_conditions.append('name >= ?')
query_args.append(prefix)
- if self.get_db_version(conn) < 1:
- query += ' +deleted = 0'
- else:
- query += ' deleted = 0'
- orig_tail_query = '''
- ORDER BY name %s LIMIT ?
- ''' % ('DESC' if reverse else '')
- orig_tail_args = [limit - len(results)]
+ query_conditions.append(deleted_key + deleted_arg)
+ if since_row:
+ query_conditions.append('ROWID > ?')
+ query_args.append(since_row)
+
+ def build_query(keys, conditions, args):
+ query = 'SELECT ' + ', '.join(keys) + ' FROM object '
+ if conditions:
+ query += 'WHERE ' + ' AND '.join(conditions)
+ tail_query = '''
+ ORDER BY name %s LIMIT ?
+ ''' % ('DESC' if reverse else '')
+ return query + tail_query, args + [limit - len(results)]
+
# storage policy filter
- policy_tail_query = '''
- AND storage_policy_index = ?
- ''' + orig_tail_query
- policy_tail_args = [storage_policy_index] + orig_tail_args
- tail_query, tail_args = \
- policy_tail_query, policy_tail_args
+ if all_policies:
+ query, args = build_query(
+ query_keys + ['storage_policy_index'],
+ query_conditions,
+ query_args)
+ else:
+ query, args = build_query(
+ query_keys + ['storage_policy_index'],
+ query_conditions + ['storage_policy_index = ?'],
+ query_args + [storage_policy_index])
try:
- curs = conn.execute(query + tail_query,
- tuple(query_args + tail_args))
+ curs = conn.execute(query, tuple(args))
except sqlite3.OperationalError as err:
if 'no such column: storage_policy_index' not in str(err):
raise
- tail_query, tail_args = \
- orig_tail_query, orig_tail_args
- curs = conn.execute(query + tail_query,
- tuple(query_args + tail_args))
+ query, args = build_query(
+ query_keys + ['0 as storage_policy_index'],
+ query_conditions, query_args)
+ curs = conn.execute(query, tuple(args))
curs.row_factory = None
# Delimiters without a prefix is ignored, further if there
# is no delimiter then we can simply return the result as
# prefixes are now handled in the SQL statement.
if prefix is None or not delimiter:
- return [self._transform_record(r) for r in curs]
+ return [transform_func(r) for r in curs]
# We have a delimiter and a prefix (possibly empty string) to
# handle
@@ -787,19 +1210,51 @@ class ContainerBroker(DatabaseBroker):
results.append([dir_name, '0', 0, None, ''])
curs.close()
break
- results.append(self._transform_record(row))
+ results.append(transform_func(row))
if not rowcount:
break
return results
+ def get_objects(self, limit=None, marker='', end_marker='',
+ include_deleted=None, since_row=None):
+ """
+ Returns a list of objects, including deleted objects, in all policies.
+ Each object in the list is described by a dict with keys {'name',
+ 'created_at', 'size', 'content_type', 'etag', 'deleted',
+ 'storage_policy_index'}.
+
+ :param limit: maximum number of entries to get
+ :param marker: if set, objects with names less than or equal to this
+ value will not be included in the list.
+ :param end_marker: if set, objects with names greater than or equal to
+ this value will not be included in the list.
+ :param include_deleted: if True, include only deleted objects; if
+ False, include only undeleted objects; otherwise (default), include
+ both deleted and undeleted objects.
+ :param since_row: include only items whose ROWID is greater than
+ the given row id; by default all rows are included.
+ :return: a list of dicts, each describing an object.
+ """
+
+ limit = CONTAINER_LISTING_LIMIT if limit is None else limit
+ return self.list_objects_iter(
+ limit, marker, end_marker, prefix=None, delimiter=None, path=None,
+ reverse=False, include_deleted=include_deleted,
+ transform_func=self._record_to_dict, since_row=since_row,
+ all_policies=True
+ )
+
def _transform_record(self, record):
"""
- Decode the created_at timestamp into separate data, content-type and
- meta timestamps and replace the created_at timestamp with the
- metadata timestamp i.e. the last-modified time.
+ Returns a tuple of (name, last-modified time, size, content_type and
+ etag) for the given record.
+
+ The given record's created_at timestamp is decoded into separate data,
+ content-type and meta timestamps and the metadata timestamp is used as
+ the last-modified time value.
"""
t_data, t_ctype, t_meta = decode_timestamps(record[1])
- return (record[0], t_meta.internal) + record[2:]
+ return (record[0], t_meta.internal) + record[2:5]
def _record_to_dict(self, rec):
if rec:
@@ -822,7 +1277,7 @@ class ContainerBroker(DatabaseBroker):
if isinstance(item['name'], six.text_type):
item['name'] = item['name'].encode('utf-8')
- def _really_merge_items(conn):
+ def _really_really_merge_items(conn):
curs = conn.cursor()
if self.get_db_version(conn) >= 1:
query_mod = ' deleted IN (0, 1) AND '
@@ -885,6 +1340,9 @@ class ContainerBroker(DatabaseBroker):
''', (sync_point, source))
conn.commit()
+ def _really_merge_items(conn):
+ return tpool.execute(_really_really_merge_items, conn)
+
with self.get() as conn:
try:
return _really_merge_items(conn)
@@ -894,6 +1352,86 @@ class ContainerBroker(DatabaseBroker):
self._migrate_add_storage_policy(conn)
return _really_merge_items(conn)
+ def merge_shard_ranges(self, shard_ranges):
+ """
+ Merge shard ranges into the shard range table.
+
+ :param shard_ranges: a shard range or a list of shard ranges; each
+ shard range should be an instance of
+ :class:`~swift.common.utils.ShardRange` or a dict representation of
+ a shard range having ``SHARD_RANGE_KEYS``.
+ """
+ if not shard_ranges:
+ return
+ if not isinstance(shard_ranges, list):
+ shard_ranges = [shard_ranges]
+
+ item_list = []
+ for item in shard_ranges:
+ if isinstance(item, ShardRange):
+ item = dict(item)
+ for col in ('name', 'lower', 'upper'):
+ if isinstance(item[col], six.text_type):
+ item[col] = item[col].encode('utf-8')
+ item_list.append(item)
+
+ def _really_merge_items(conn):
+ curs = conn.cursor()
+ curs.execute('BEGIN IMMEDIATE')
+
+ # Get rows for items that already exist.
+ # We must chunk it up to avoid sqlite's limit of 999 args.
+ records = {}
+ for offset in range(0, len(item_list), SQLITE_ARG_LIMIT):
+ chunk = [record['name'] for record
+ in item_list[offset:offset + SQLITE_ARG_LIMIT]]
+ records.update(
+ (rec[0], rec) for rec in curs.execute(
+ 'SELECT %s FROM %s '
+ 'WHERE deleted IN (0, 1) AND name IN (%s)' %
+ (', '.join(SHARD_RANGE_KEYS), SHARD_RANGE_TABLE,
+ ','.join('?' * len(chunk))), chunk))
+
+ # Sort item_list into things that need adding and deleting
+ to_delete = {}
+ to_add = {}
+ for item in item_list:
+ item_ident = item['name']
+ existing = records.get(item_ident)
+ if existing:
+ existing = dict(zip(SHARD_RANGE_KEYS, existing))
+ if merge_shards(item, existing):
+ # exists with older timestamp
+ if item_ident in records:
+ to_delete[item_ident] = item
+ # duplicate entries in item_list
+ if (item_ident not in to_add or
+ merge_shards(item, to_add[item_ident])):
+ to_add[item_ident] = item
+
+ if to_delete:
+ curs.executemany(
+ 'DELETE FROM %s WHERE deleted in (0, 1) '
+ 'AND name = ?' % SHARD_RANGE_TABLE,
+ ((item_ident,) for item_ident in to_delete))
+ if to_add:
+ vals = ','.join('?' * len(SHARD_RANGE_KEYS))
+ curs.executemany(
+ 'INSERT INTO %s (%s) VALUES (%s)' %
+ (SHARD_RANGE_TABLE, ','.join(SHARD_RANGE_KEYS), vals),
+ tuple([item[k] for k in SHARD_RANGE_KEYS]
+ for item in to_add.values()))
+ conn.commit()
+
+ with self.get() as conn:
+ try:
+ return _really_merge_items(conn)
+ except sqlite3.OperationalError as err:
+ if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err):
+ raise
+ self.create_shard_range_table(conn)
+ return _really_merge_items(conn)
+
def get_reconciler_sync(self):
with self.get() as conn:
try:
@@ -1039,3 +1577,644 @@ class ContainerBroker(DatabaseBroker):
''' % (column_names, column_names) +
CONTAINER_STAT_VIEW_SCRIPT +
'COMMIT;')
+
+ def _reclaim(self, conn, age_timestamp, sync_timestamp):
+ super(ContainerBroker, self)._reclaim(conn, age_timestamp,
+ sync_timestamp)
+ # populate instance cache, but use existing conn to avoid deadlock
+ # when it has a pending update
+ self._populate_instance_cache(conn=conn)
+ try:
+ conn.execute('''
+ DELETE FROM %s WHERE deleted = 1 AND timestamp < ?
+ AND name != ?
+ ''' % SHARD_RANGE_TABLE, (sync_timestamp, self.path))
+ except sqlite3.OperationalError as err:
+ if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err):
+ raise
+
+ def _get_shard_range_rows(self, connection=None, include_deleted=False,
+ states=None, exclude_states=None,
+ include_own=False, exclude_others=False):
+ """
+ Returns a list of shard range rows.
+
+ To get all shard ranges use ``include_own=True``. To get only the
+ broker's own shard range use ``include_own=True`` and
+ ``exclude_others=True``.
+
+ :param connection: db connection
+ :param include_deleted: include rows marked as deleted
+ :param states: include only rows matching the given state(s); can be an
+ int or a list of ints.
+ :param exclude_states: exclude rows matching the given state(s); can be
+ an int or a list of ints; takes precedence over ``state``.
+ :param include_own: boolean that governs whether the row whose name
+ matches the broker's path is included in the returned list. If
+ True, that row is included, otherwise it is not included. Default
+ is False.
+ :param exclude_others: boolean that governs whether the rows whose
+ names do not match the broker's path are included in the returned
+ list. If True, those rows are not included, otherwise they are
+ included. Default is False.
+ :return: a list of tuples.
+ """
+
+ if exclude_others and not include_own:
+ return []
+
+ def prep_states(states):
+ state_set = set()
+ if isinstance(states, (list, tuple, set)):
+ state_set.update(states)
+ elif states is not None:
+ state_set.add(states)
+ return state_set
+
+ excluded_states = prep_states(exclude_states)
+ included_states = prep_states(states)
+ included_states -= excluded_states
+
+ def do_query(conn):
+ try:
+ condition = ''
+ conditions = []
+ params = []
+ if not include_deleted:
+ conditions.append('deleted=0')
+ if included_states:
+ conditions.append('state in (%s)' % ','.join(
+ '?' * len(included_states)))
+ params.extend(included_states)
+ if excluded_states:
+ conditions.append('state not in (%s)' % ','.join(
+ '?' * len(excluded_states)))
+ params.extend(excluded_states)
+ if not include_own:
+ conditions.append('name != ?')
+ params.append(self.path)
+ if exclude_others:
+ conditions.append('name = ?')
+ params.append(self.path)
+ if conditions:
+ condition = ' WHERE ' + ' AND '.join(conditions)
+ sql = '''
+ SELECT %s
+ FROM %s%s;
+ ''' % (', '.join(SHARD_RANGE_KEYS), SHARD_RANGE_TABLE,
+ condition)
+ data = conn.execute(sql, params)
+ data.row_factory = None
+ return [row for row in data]
+ except sqlite3.OperationalError as err:
+ if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err):
+ raise
+ return []
+
+ if connection:
+ return do_query(connection)
+ else:
+ with self.get() as conn:
+ return do_query(conn)
+
+ @classmethod
+ def resolve_shard_range_states(cls, states):
+ """
+ Given a list of values each of which may be the name of a state, the
+ number of a state, or an alias, return the set of state numbers
+ described by the list.
+
+ The following alias values are supported: 'listing' maps to all states
+ that are considered valid when listing objects; 'updating' maps to all
+ states that are considered valid for redirecting an object update.
+
+ :param states: a list of values each of which may be the name of a
+ state, the number of a state, or an alias
+ :return: a set of integer state numbers, or None if no states are given
+ :raises ValueError: if any value in the given list is neither a valid
+ state nor a valid alias
+ """
+ if states:
+ resolved_states = set()
+ for state in states:
+ if state == 'listing':
+ resolved_states.update(SHARD_LISTING_STATES)
+ elif state == 'updating':
+ resolved_states.update(SHARD_UPDATE_STATES)
+ else:
+ resolved_states.add(ShardRange.resolve_state(state)[0])
+ return resolved_states
+ return None
+
+ def get_shard_ranges(self, marker=None, end_marker=None, includes=None,
+ reverse=False, include_deleted=False, states=None,
+ exclude_states=None, include_own=False,
+ exclude_others=False, fill_gaps=False):
+ """
+ Returns a list of persisted shard ranges.
+
+ :param marker: restricts the returned list to shard ranges whose
+ namespace includes or is greater than the marker value.
+ :param end_marker: restricts the returned list to shard ranges whose
+ namespace includes or is less than the end_marker value.
+ :param includes: restricts the returned list to the shard range that
+ includes the given value; if ``includes`` is specified then
+ ``marker`` and ``end_marker`` are ignored.
+ :param reverse: reverse the result order.
+ :param include_deleted: include items that have the delete marker set
+ :param states: if specified, restricts the returned list to shard
+ ranges that have the given state(s); can be a list of ints or a
+ single int.
+ :param exclude_states: exclude rows matching the given state(s); can be
+ an int or a list of ints; takes precedence over ``state``.
+ :param include_own: boolean that governs whether the row whose name
+ matches the broker's path is included in the returned list. If
+ True, that row is included, otherwise it is not included. Default
+ is False.
+ :param exclude_others: boolean that governs whether the rows whose
+ names do not match the broker's path are included in the returned
+ list. If True, those rows are not included, otherwise they are
+ included. Default is False.
+ :param fill_gaps: if True, insert own shard range to fill any gaps in
+ at the tail of other shard ranges.
+ :return: a list of instances of :class:`swift.common.utils.ShardRange`
+ """
+ def shard_range_filter(sr):
+ end = start = True
+ if end_marker:
+ end = end_marker > sr.lower
+ if marker:
+ start = marker < sr.upper
+ return start and end
+
+ if reverse:
+ marker, end_marker = end_marker, marker
+ if marker and end_marker and marker >= end_marker:
+ return []
+
+ shard_ranges = [
+ ShardRange(*row)
+ for row in self._get_shard_range_rows(
+ include_deleted=include_deleted, states=states,
+ exclude_states=exclude_states, include_own=include_own,
+ exclude_others=exclude_others)]
+ # note if this ever changes to *not* sort by upper first then it breaks
+ # a key assumption for bisect, which is used by utils.find_shard_ranges
+ shard_ranges.sort(key=lambda sr: (sr.upper, sr.state, sr.lower))
+ if includes:
+ shard_range = find_shard_range(includes, shard_ranges)
+ return [shard_range] if shard_range else []
+
+ if reverse:
+ shard_ranges.reverse()
+ if marker or end_marker:
+ shard_ranges = list(filter(shard_range_filter, shard_ranges))
+
+ if fill_gaps:
+ if reverse:
+ if shard_ranges:
+ last_upper = shard_ranges[0].upper
+ else:
+ last_upper = marker or ShardRange.MIN
+ required_upper = end_marker or ShardRange.MAX
+ filler_index = 0
+ else:
+ if shard_ranges:
+ last_upper = shard_ranges[-1].upper
+ else:
+ last_upper = marker or ShardRange.MIN
+ required_upper = end_marker or ShardRange.MAX
+ filler_index = len(shard_ranges)
+ if required_upper > last_upper:
+ filler_sr = self.get_own_shard_range()
+ filler_sr.lower = last_upper
+ filler_sr.upper = required_upper
+ shard_ranges.insert(filler_index, filler_sr)
+
+ return shard_ranges
+
+ def _own_shard_range(self, no_default=False):
+ shard_ranges = self.get_shard_ranges(include_own=True,
+ include_deleted=True,
+ exclude_others=True)
+ if shard_ranges:
+ own_shard_range = shard_ranges[0]
+ elif no_default:
+ return None
+ else:
+ own_shard_range = ShardRange(
+ self.path, Timestamp.now(), ShardRange.MIN, ShardRange.MAX,
+ state=ShardRange.ACTIVE)
+ return own_shard_range
+
+ def get_own_shard_range(self, no_default=False):
+ """
+ Returns a shard range representing this broker's own shard range. If no
+ such range has been persisted in the broker's shard ranges table then a
+ default shard range representing the entire namespace will be returned.
+
+ The returned shard range will be updated with the current object stats
+ for this broker and a meta timestamp set to the current time. For these
+ values to be persisted the caller must merge the shard range.
+
+ :param no_default: if True and the broker's own shard range is not
+ found in the shard ranges table then None is returned, otherwise a
+ default shard range is returned.
+ :return: an instance of :class:`~swift.common.utils.ShardRange`
+ """
+ own_shard_range = self._own_shard_range(no_default=no_default)
+ if own_shard_range:
+ info = self.get_info()
+ own_shard_range.update_meta(
+ info['object_count'], info['bytes_used'])
+ return own_shard_range
+
+ def is_own_shard_range(self, shard_range):
+ return shard_range.name == self.path
+
+ def enable_sharding(self, epoch):
+ """
+ Updates this broker's own shard range with the given epoch, sets its
+ state to SHARDING and persists it in the DB.
+
+ :param epoch: a :class:`~swift.utils.common.Timestamp`
+ :return: the broker's updated own shard range.
+ """
+ own_shard_range = self._own_shard_range()
+ own_shard_range.update_state(ShardRange.SHARDING, epoch)
+ own_shard_range.epoch = epoch
+ self.merge_shard_ranges(own_shard_range)
+ return own_shard_range
+
+ def get_shard_usage(self):
+ """
+ Get the aggregate object stats for all shard ranges in states ACTIVE,
+ SHARDING or SHRINKING.
+
+ :return: a dict with keys {bytes_used, object_count}
+ """
+ shard_ranges = self.get_shard_ranges(states=SHARD_STATS_STATES)
+ return {'bytes_used': sum(sr.bytes_used for sr in shard_ranges),
+ 'object_count': sum(sr.object_count for sr in shard_ranges)}
+
+ def get_all_shard_range_data(self):
+ """
+ Returns a list of all shard range data, including own shard range and
+ deleted shard ranges.
+
+ :return: A list of dict representations of a ShardRange.
+ """
+ shard_ranges = self.get_shard_ranges(include_deleted=True,
+ include_own=True)
+ return [dict(sr) for sr in shard_ranges]
+
+ def set_sharding_state(self):
+ """
+ Creates and initializes a fresh DB file in preparation for sharding a
+ retiring DB. The broker's own shard range must have an epoch timestamp
+ for this method to succeed.
+
+ :return: True if the fresh DB was successfully created, False
+ otherwise.
+ """
+ epoch = self.get_own_shard_range().epoch
+ if not epoch:
+ self.logger.warning("Container '%s' cannot be set to sharding "
+ "state: missing epoch", self.path)
+ return False
+ state = self.get_db_state()
+ if not state == UNSHARDED:
+ self.logger.warning("Container '%s' cannot be set to sharding "
+ "state while in %s state", self.path, state)
+ return False
+
+ info = self.get_info()
+ # The tmp_dir is cleaned up by the replicators after reclaim_age, so if
+ # we initially create the fresh DB there, we will already have cleanup
+ # covered if there is an error.
+ tmp_dir = os.path.join(self.get_device_path(), 'tmp')
+ if not os.path.exists(tmp_dir):
+ mkdirs(tmp_dir)
+ tmp_db_file = os.path.join(tmp_dir, "fresh%s.db" % str(uuid4()))
+ fresh_broker = ContainerBroker(tmp_db_file, self.timeout, self.logger,
+ self.account, self.container)
+ fresh_broker.initialize(info['put_timestamp'],
+ info['storage_policy_index'])
+ # copy relevant data from the retiring db to the fresh db
+ fresh_broker.update_metadata(self.metadata)
+ fresh_broker.merge_shard_ranges(self.get_all_shard_range_data())
+ # copy sync points so that any peer in sync with retiring db will
+ # appear to be in sync with the fresh db, although the peer shouldn't
+ # attempt to replicate objects to a db with shard ranges.
+ for incoming in (True, False):
+ syncs = self.get_syncs(incoming)
+ fresh_broker.merge_syncs(syncs, incoming)
+
+ max_row = self.get_max_row()
+ with fresh_broker.get() as fresh_broker_conn:
+ # Initialise the rowid to continue from where the retiring db ended
+ try:
+ sql = "INSERT into object " \
+ "(ROWID, name, created_at, size, content_type, etag) " \
+ "values (?, 'tmp_sharding', ?, 0, '', ?)"
+ fresh_broker_conn.execute(
+ sql, (max_row, Timestamp.now().internal,
+ MD5_OF_EMPTY_STRING))
+ fresh_broker_conn.execute(
+ 'DELETE FROM object WHERE ROWID = ?', (max_row,))
+ fresh_broker_conn.commit()
+ except sqlite3.OperationalError as err:
+ self.logger.error(
+ 'Failed to set the ROWID of the fresh database for %s: %s',
+ self.path, err)
+ return False
+
+ # Set the created_at and hash in the container_info table the same
+ # in both brokers
+ try:
+ fresh_broker_conn.execute(
+ 'UPDATE container_stat SET created_at=?',
+ (info['created_at'],))
+ fresh_broker_conn.commit()
+ except sqlite3.OperationalError as err:
+ self.logger.error('Failed to set matching created_at time in '
+ 'the fresh database for %s: %s',
+ self.path, err)
+ return False
+
+ # Rename to the new database
+ fresh_db_filename = make_db_file_path(self._db_file, epoch)
+ renamer(tmp_db_file, fresh_db_filename)
+ self.reload_db_files()
+ return True
+
+ def set_sharded_state(self):
+ """
+ Unlink's the broker's retiring DB file.
+
+ :return: True if the retiring DB was successfully unlinked, False
+ otherwise.
+ """
+ state = self.get_db_state()
+ if not state == SHARDING:
+ self.logger.warning("Container %r cannot be set to sharded "
+ "state while in %s state",
+ self.path, state)
+ return False
+
+ self.reload_db_files()
+ if len(self.db_files) < 2:
+ self.logger.warning(
+ 'Refusing to delete db file for %r: no fresher db file found '
+ 'in %r.', self.path, self.db_files)
+ return False
+
+ retiring_file = self.db_files[-2]
+ try:
+ os.unlink(retiring_file)
+ self.logger.debug('Unlinked retiring db %r', retiring_file)
+ except OSError as err:
+ if err.errno != errno.ENOENT:
+ self.logger.exception('Failed to unlink %r' % self._db_file)
+ return False
+
+ self.reload_db_files()
+ if len(self.db_files) >= 2:
+ self.logger.warning(
+ 'Still have multiple db files after unlinking %r: %r',
+ retiring_file, self.db_files)
+ return False
+
+ return True
+
+ def get_brokers(self):
+ """
+ Return a list of brokers for component dbs. The list has two entries
+ while the db state is sharding: the first entry is a broker for the
+ retiring db with ``skip_commits`` set to ``True``; the second entry is
+ a broker for the fresh db with ``skip_commits`` set to ``False``. For
+ any other db state the list has one entry.
+
+ :return: a list of :class:`~swift.container.backend.ContainerBroker`
+ """
+ if len(self.db_files) > 2:
+ self.logger.warning('Unexpected db files will be ignored: %s' %
+ self.db_files[:-2])
+ brokers = []
+ db_files = self.db_files[-2:]
+ while db_files:
+ db_file = db_files.pop(0)
+ sub_broker = ContainerBroker(
+ db_file, self.timeout, self.logger, self.account,
+ self.container, self.pending_timeout, self.stale_reads_ok,
+ force_db_file=True, skip_commits=bool(db_files))
+ brokers.append(sub_broker)
+ return brokers
+
+ def set_sharding_sysmeta(self, key, value):
+ """
+ Updates the broker's metadata metadata stored under the given key
+ prefixed with a sharding specific namespace.
+
+ :param key: metadata key in the sharding metadata namespace.
+ :param value: metadata value
+ """
+ self.update_metadata({'X-Container-Sysmeta-Shard-' + key:
+ (value, Timestamp.now().internal)})
+
+ def get_sharding_sysmeta(self, key=None):
+ """
+ Returns sharding specific info from the broker's metadata.
+
+ :param key: if given the value stored under ``key`` in the sharding
+ info will be returned.
+ :return: either a dict of sharding info or the value stored under
+ ``key`` in that dict.
+ """
+ prefix = 'X-Container-Sysmeta-Shard-'
+ metadata = self.metadata
+ info = dict((k[len(prefix):], v[0]) for
+ k, v in metadata.items() if k.startswith(prefix))
+ if key:
+ return info.get(key)
+ return info
+
+ def _load_root_info(self):
+ """
+ Load the root container name and account for the container represented
+ by this broker.
+
+ The root container path, if set, is stored in sysmeta under the key
+ ``X-Container-Sysmeta-Shard-Root``. If this sysmeta is not set then the
+ container is considered to be a root container and ``_root_account``
+ and ``_root_container`` are set equal to the broker ``account`` and
+ ``container`` attributes respectively.
+
+ """
+ path = self.get_sharding_sysmeta('Root')
+ if not path:
+ # Ensure account/container get populated
+ self._populate_instance_cache()
+ self._root_account = self.account
+ self._root_container = self.container
+ return
+
+ try:
+ self._root_account, self._root_container = split_path(
+ '/' + path, 2, 2)
+ except ValueError:
+ raise ValueError("Expected X-Container-Sysmeta-Shard-Root to be "
+ "of the form 'account/container', got %r" % path)
+
+ @property
+ def root_account(self):
+ if not self._root_account:
+ self._load_root_info()
+ return self._root_account
+
+ @property
+ def root_container(self):
+ if not self._root_container:
+ self._load_root_info()
+ return self._root_container
+
+ @property
+ def root_path(self):
+ return '%s/%s' % (self.root_account, self.root_container)
+
+ def is_root_container(self):
+ """
+ Returns True if this container is a root container, False otherwise.
+
+ A root container is a container that is not a shard of another
+ container.
+ """
+ self._populate_instance_cache()
+ return (self.root_account == self.account and
+ self.root_container == self.container)
+
+ def _get_next_shard_range_upper(self, shard_size, last_upper=None):
+ """
+ Returns the name of the object that is ``shard_size`` rows beyond
+ ``last_upper`` in the object table ordered by name. If ``last_upper``
+ is not given then it defaults to the start of object table ordered by
+ name.
+
+ :param last_upper: the upper bound of the last found shard range.
+ :return: an object name, or None if the number of rows beyond
+ ``last_upper`` is less than ``shard_size``.
+ """
+ self._commit_puts_stale_ok()
+ with self.get() as connection:
+ sql = ('SELECT name FROM object WHERE %s=0 ' %
+ self._get_deleted_key(connection))
+ args = []
+ if last_upper:
+ sql += "AND name > ? "
+ args.append(str(last_upper))
+ sql += "ORDER BY name LIMIT 1 OFFSET %d" % (shard_size - 1)
+ row = connection.execute(sql, args).fetchone()
+ return row['name'] if row else None
+
+ def find_shard_ranges(self, shard_size, limit=-1, existing_ranges=None):
+ """
+ Scans the container db for shard ranges. Scanning will start at the
+ upper bound of the any ``existing_ranges`` that are given, otherwise
+ at ``ShardRange.MIN``. Scanning will stop when ``limit`` shard ranges
+ have been found or when no more shard ranges can be found. In the
+ latter case, the upper bound of the final shard range will be equal to
+ the upper bound of the container namespace.
+
+ This method does not modify the state of the db; callers are
+ responsible for persisting any shard range data in the db.
+
+ :param shard_size: the size of each shard range
+ :param limit: the maximum number of shard points to be found; a
+ negative value (default) implies no limit.
+ :param existing_ranges: an optional list of existing ShardRanges; if
+ given, this list should be sorted in order of upper bounds; the
+ scan for new shard ranges will start at the upper bound of the last
+ existing ShardRange.
+ :return: a tuple; the first value in the tuple is a list of
+ dicts each having keys {'index', 'lower', 'upper', 'object_count'}
+ in order of ascending 'upper'; the second value in the tuple is a
+ boolean which is True if the last shard range has been found, False
+ otherwise.
+ """
+ existing_ranges = existing_ranges or []
+ object_count = self.get_info().get('object_count', 0)
+ if shard_size >= object_count:
+ # container not big enough to shard
+ return [], False
+
+ own_shard_range = self.get_own_shard_range()
+ progress = 0
+ progress_reliable = True
+ # update initial state to account for any existing shard ranges
+ if existing_ranges:
+ if all([sr.state == ShardRange.FOUND
+ for sr in existing_ranges]):
+ progress = sum([sr.object_count for sr in existing_ranges])
+ else:
+ # else: object count in existing shard ranges may have changed
+ # since they were found so progress cannot be reliably
+ # calculated; use default progress of zero - that's ok,
+ # progress is used for optimisation not correctness
+ progress_reliable = False
+ last_shard_upper = existing_ranges[-1].upper
+ if last_shard_upper >= own_shard_range.upper:
+ # == implies all ranges were previously found
+ # > implies an acceptor range has been set into which this
+ # shard should cleave itself
+ return [], True
+ else:
+ last_shard_upper = own_shard_range.lower
+
+ found_ranges = []
+ sub_broker = self.get_brokers()[0]
+ index = len(existing_ranges)
+ while limit < 0 or len(found_ranges) < limit:
+ if progress + shard_size >= object_count:
+ # next shard point is at or beyond final object name so don't
+ # bother with db query
+ next_shard_upper = None
+ else:
+ try:
+ next_shard_upper = sub_broker._get_next_shard_range_upper(
+ shard_size, last_shard_upper)
+ except (sqlite3.OperationalError, LockTimeout):
+ self.logger.exception(
+ "Problem finding shard upper in %r: " % self.db_file)
+ break
+
+ if (next_shard_upper is None or
+ next_shard_upper > own_shard_range.upper):
+ # We reached the end of the container namespace, or possibly
+ # beyond if the container has misplaced objects. In either case
+ # limit the final shard range to own_shard_range.upper.
+ next_shard_upper = own_shard_range.upper
+ if progress_reliable:
+ # object count may include misplaced objects so the final
+ # shard size may not be accurate until cleaved, but at
+ # least the sum of shard sizes will equal the unsharded
+ # object_count
+ shard_size = object_count - progress
+
+ # NB shard ranges are created with a non-zero object count so that
+ # the apparent container object count remains constant, and the
+ # container is non-deletable while shards have been found but not
+ # yet cleaved
+ found_ranges.append(
+ {'index': index,
+ 'lower': str(last_shard_upper),
+ 'upper': str(next_shard_upper),
+ 'object_count': shard_size})
+
+ if next_shard_upper == own_shard_range.upper:
+ return found_ranges, True
+
+ progress += shard_size
+ last_shard_upper = next_shard_upper
+ index += 1
+
+ return found_ranges, False
diff --git a/swift/container/replicator.py b/swift/container/replicator.py
index 41c048716d..b326ab70e3 100644
--- a/swift/container/replicator.py
+++ b/swift/container/replicator.py
@@ -26,11 +26,10 @@ from swift.container.reconciler import (
get_reconciler_container_name, get_row_to_q_entry_translator)
from swift.common import db_replicator
from swift.common.storage_policy import POLICIES
+from swift.common.swob import HTTPOk, HTTPAccepted
from swift.common.exceptions import DeviceUnavailable
from swift.common.http import is_success
-from swift.common.db import DatabaseAlreadyExists
-from swift.common.utils import (Timestamp, hash_path,
- storage_directory, majority_size)
+from swift.common.utils import Timestamp, majority_size, get_db_files
class ContainerReplicator(db_replicator.Replicator):
@@ -39,6 +38,10 @@ class ContainerReplicator(db_replicator.Replicator):
datadir = DATADIR
default_port = 6201
+ def __init__(self, conf, logger=None):
+ super(ContainerReplicator, self).__init__(conf, logger=logger)
+ self.reconciler_cleanups = self.sync_store = None
+
def report_up_to_date(self, full_info):
reported_key_map = {
'reported_put_timestamp': 'put_timestamp',
@@ -61,8 +64,7 @@ class ContainerReplicator(db_replicator.Replicator):
return sync_args
def _handle_sync_response(self, node, response, info, broker, http,
- different_region):
- parent = super(ContainerReplicator, self)
+ different_region=False):
if is_success(response.status):
remote_info = json.loads(response.data)
if incorrect_policy_index(info, remote_info):
@@ -75,9 +77,50 @@ class ContainerReplicator(db_replicator.Replicator):
if any(info[key] != remote_info[key] for key in sync_timestamps):
broker.merge_timestamps(*(remote_info[key] for key in
sync_timestamps))
- rv = parent._handle_sync_response(
+
+ # Grab remote's shard ranges, too
+ self._fetch_and_merge_shard_ranges(http, broker)
+
+ return super(ContainerReplicator, self)._handle_sync_response(
node, response, info, broker, http, different_region)
- return rv
+
+ def _sync_shard_ranges(self, broker, http, local_id):
+ # TODO: currently the number of shard ranges is expected to be _much_
+ # less than normal objects so all are sync'd on each cycle. However, in
+ # future there should be sync points maintained much like for object
+ # syncing so that only new shard range rows are sync'd.
+ shard_range_data = broker.get_all_shard_range_data()
+ if shard_range_data:
+ if not self._send_replicate_request(
+ http, 'merge_shard_ranges', shard_range_data, local_id):
+ return False
+ self.logger.debug('%s synced %s shard ranges to %s',
+ broker.db_file, len(shard_range_data),
+ '%(ip)s:%(port)s/%(device)s' % http.node)
+ return True
+
+ def _choose_replication_mode(self, node, rinfo, info, local_sync, broker,
+ http, different_region):
+ # Always replicate shard ranges
+ shard_range_success = self._sync_shard_ranges(broker, http, info['id'])
+ if broker.sharding_initiated():
+ self.logger.warning(
+ '%s is able to shard -- refusing to replicate objects to peer '
+ '%s; have shard ranges and will wait for cleaving',
+ broker.db_file,
+ '%(ip)s:%(port)s/%(device)s' % node)
+ self.stats['deferred'] += 1
+ return shard_range_success
+
+ success = super(ContainerReplicator, self)._choose_replication_mode(
+ node, rinfo, info, local_sync, broker, http,
+ different_region)
+ return shard_range_success and success
+
+ def _fetch_and_merge_shard_ranges(self, http, broker):
+ response = http.replicate('get_shard_ranges')
+ if is_success(response.status):
+ broker.merge_shard_ranges(json.loads(response.data))
def find_local_handoff_for_part(self, part):
"""
@@ -114,15 +157,10 @@ class ContainerReplicator(db_replicator.Replicator):
raise DeviceUnavailable(
'No mounted devices found suitable to Handoff reconciler '
'container %s in partition %s' % (container, part))
- hsh = hash_path(account, container)
- db_dir = storage_directory(DATADIR, part, hsh)
- db_path = os.path.join(self.root, node['device'], db_dir, hsh + '.db')
- broker = ContainerBroker(db_path, account=account, container=container)
- if not os.path.exists(broker.db_file):
- try:
- broker.initialize(timestamp, 0)
- except DatabaseAlreadyExists:
- pass
+ broker = ContainerBroker.create_broker(
+ os.path.join(self.root, node['device']), part, account, container,
+ logger=self.logger, put_timestamp=timestamp,
+ storage_policy_index=0)
if self.reconciler_containers is not None:
self.reconciler_containers[container] = part, broker, node['id']
return broker
@@ -207,6 +245,18 @@ class ContainerReplicator(db_replicator.Replicator):
# replication
broker.update_reconciler_sync(max_sync)
+ def cleanup_post_replicate(self, broker, orig_info, responses):
+ debug_template = 'Not deleting db %s (%%s)' % broker.db_file
+ if broker.sharding_required():
+ # despite being a handoff, since we're sharding we're not going to
+ # do any cleanup so we can continue cleaving - this is still
+ # considered "success"
+ reason = 'requires sharding, state %s' % broker.get_db_state()
+ self.logger.debug(debug_template, reason)
+ return True
+ return super(ContainerReplicator, self).cleanup_post_replicate(
+ broker, orig_info, responses)
+
def delete_db(self, broker):
"""
Ensure that reconciler databases are only cleaned up at the end of the
@@ -217,12 +267,13 @@ class ContainerReplicator(db_replicator.Replicator):
# this container shouldn't be here, make sure it's cleaned up
self.reconciler_cleanups[broker.container] = broker
return
- try:
- # DB is going to get deleted. Be preemptive about it
- self.sync_store.remove_synced_container(broker)
- except Exception:
- self.logger.exception('Failed to remove sync_store entry %s' %
- broker.db_file)
+ if self.sync_store:
+ try:
+ # DB is going to get deleted. Be preemptive about it
+ self.sync_store.remove_synced_container(broker)
+ except Exception:
+ self.logger.exception('Failed to remove sync_store entry %s' %
+ broker.db_file)
return super(ContainerReplicator, self).delete_db(broker)
@@ -259,9 +310,20 @@ class ContainerReplicator(db_replicator.Replicator):
self.replicate_reconcilers()
return rv
+ def _in_sync(self, rinfo, info, broker, local_sync):
+ # TODO: don't always sync shard ranges!
+ if broker.get_shard_ranges(include_own=True, include_deleted=True):
+ return False
+
+ return super(ContainerReplicator, self)._in_sync(
+ rinfo, info, broker, local_sync)
+
class ContainerReplicatorRpc(db_replicator.ReplicatorRpc):
+ def _db_file_exists(self, db_path):
+ return bool(get_db_files(db_path))
+
def _parse_sync_args(self, args):
parent = super(ContainerReplicatorRpc, self)
remote_info = parent._parse_sync_args(args)
@@ -289,3 +351,27 @@ class ContainerReplicatorRpc(db_replicator.ReplicatorRpc):
timestamp=status_changed_at)
info = broker.get_replication_info()
return info
+
+ def _abort_rsync_then_merge(self, db_file, old_filename):
+ if super(ContainerReplicatorRpc, self)._abort_rsync_then_merge(
+ db_file, old_filename):
+ return True
+ # if the local db has started sharding since the original 'sync'
+ # request then abort object replication now; instantiate a fresh broker
+ # each time this check if performed so to get latest state
+ broker = ContainerBroker(db_file)
+ return broker.sharding_initiated()
+
+ def _post_rsync_then_merge_hook(self, existing_broker, new_broker):
+ # Note the following hook will need to change to using a pointer and
+ # limit in the future.
+ new_broker.merge_shard_ranges(
+ existing_broker.get_all_shard_range_data())
+
+ def merge_shard_ranges(self, broker, args):
+ broker.merge_shard_ranges(args[0])
+ return HTTPAccepted()
+
+ def get_shard_ranges(self, broker, args):
+ return HTTPOk(headers={'Content-Type': 'application/json'},
+ body=json.dumps(broker.get_all_shard_range_data()))
diff --git a/swift/container/server.py b/swift/container/server.py
index c7df07ac8e..48a8d2c2e9 100644
--- a/swift/container/server.py
+++ b/swift/container/server.py
@@ -24,7 +24,8 @@ from eventlet import Timeout
import swift.common.db
from swift.container.sync_store import ContainerSyncStore
-from swift.container.backend import ContainerBroker, DATADIR
+from swift.container.backend import ContainerBroker, DATADIR, \
+ RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, SHARD_UPDATE_STATES
from swift.container.replicator import ContainerReplicatorRpc
from swift.common.db import DatabaseAlreadyExists
from swift.common.container_sync_realms import ContainerSyncRealms
@@ -33,7 +34,8 @@ from swift.common.request_helpers import get_param, \
from swift.common.utils import get_logger, hash_path, public, \
Timestamp, storage_directory, validate_sync_to, \
config_true_value, timing_stats, replication, \
- override_bytes_from_content_type, get_log_line
+ override_bytes_from_content_type, get_log_line, ShardRange, list_from_csv
+
from swift.common.constraints import valid_timestamp, check_utf8, check_drive
from swift.common import constraints
from swift.common.bufferedhttp import http_connect
@@ -46,7 +48,7 @@ from swift.common.header_key_dict import HeaderKeyDict
from swift.common.swob import HTTPAccepted, HTTPBadRequest, HTTPConflict, \
HTTPCreated, HTTPInternalServerError, HTTPNoContent, HTTPNotFound, \
HTTPPreconditionFailed, HTTPMethodNotAllowed, Request, Response, \
- HTTPInsufficientStorage, HTTPException
+ HTTPInsufficientStorage, HTTPException, HTTPMovedPermanently
def gen_resp_headers(info, is_deleted=False):
@@ -72,6 +74,7 @@ def gen_resp_headers(info, is_deleted=False):
'X-Timestamp': Timestamp(info.get('created_at', 0)).normal,
'X-PUT-Timestamp': Timestamp(
info.get('put_timestamp', 0)).normal,
+ 'X-Backend-Sharding-State': info.get('db_state', UNSHARDED),
})
return headers
@@ -261,6 +264,40 @@ class ContainerController(BaseStorageServer):
self.logger.exception('Failed to update sync_store %s during %s' %
(broker.db_file, method))
+ def _redirect_to_shard(self, req, broker, obj_name):
+ """
+ If the request indicates that it can accept a redirection, look for a
+ shard range that contains ``obj_name`` and if one exists return a
+ HTTPMovedPermanently response.
+
+ :param req: an instance of :class:`~swift.common.swob.Request`
+ :param broker: a container broker
+ :param obj_name: an object name
+ :return: an instance of :class:`swift.common.swob.HTTPMovedPermanently`
+ if a shard range exists for the given ``obj_name``, otherwise None.
+ """
+ if not config_true_value(
+ req.headers.get('x-backend-accept-redirect', False)):
+ return None
+
+ shard_ranges = broker.get_shard_ranges(
+ includes=obj_name, states=SHARD_UPDATE_STATES)
+ if not shard_ranges:
+ return None
+
+ # note: obj_name may be included in both a created sub-shard and its
+ # sharding parent. get_shard_ranges will return the created sub-shard
+ # in preference to the parent, which is the desired result.
+ containing_range = shard_ranges[0]
+ location = "/%s/%s" % (containing_range.name, obj_name)
+ headers = {'Location': location,
+ 'X-Backend-Redirect-Timestamp':
+ containing_range.timestamp.internal}
+
+ # we do not want the host added to the location
+ req.environ['swift.leave_relative_location'] = True
+ return HTTPMovedPermanently(headers=headers, request=req)
+
@public
@timing_stats()
def DELETE(self, req):
@@ -283,6 +320,11 @@ class ContainerController(BaseStorageServer):
if not os.path.exists(broker.db_file):
return HTTPNotFound()
if obj: # delete object
+ # redirect if a shard range exists for the object name
+ redirect = self._redirect_to_shard(req, broker, obj)
+ if redirect:
+ return redirect
+
broker.delete_object(obj, req.headers.get('x-timestamp'),
obj_policy_index)
return HTTPNoContent(request=req)
@@ -343,6 +385,40 @@ class ContainerController(BaseStorageServer):
broker.update_status_changed_at(timestamp)
return recreated
+ def _maybe_autocreate(self, broker, req_timestamp, account,
+ policy_index):
+ created = False
+ if account.startswith(self.auto_create_account_prefix) and \
+ not os.path.exists(broker.db_file):
+ if policy_index is None:
+ raise HTTPBadRequest(
+ 'X-Backend-Storage-Policy-Index header is required')
+ try:
+ broker.initialize(req_timestamp.internal, policy_index)
+ except DatabaseAlreadyExists:
+ pass
+ else:
+ created = True
+ if not os.path.exists(broker.db_file):
+ raise HTTPNotFound()
+ return created
+
+ def _update_metadata(self, req, broker, req_timestamp, method):
+ metadata = {}
+ metadata.update(
+ (key, (value, req_timestamp.internal))
+ for key, value in req.headers.items()
+ if key.lower() in self.save_headers or
+ is_sys_or_user_meta('container', key))
+ if metadata:
+ if 'X-Container-Sync-To' in metadata:
+ if 'X-Container-Sync-To' not in broker.metadata or \
+ metadata['X-Container-Sync-To'][0] != \
+ broker.metadata['X-Container-Sync-To'][0]:
+ broker.set_x_container_sync_points(-1, -1)
+ broker.update_metadata(metadata, validate_metadata=True)
+ self._update_sync_store(broker, method)
+
@public
@timing_stats()
def PUT(self, req):
@@ -364,14 +440,13 @@ class ContainerController(BaseStorageServer):
# obj put expects the policy_index header, default is for
# legacy support during upgrade.
obj_policy_index = requested_policy_index or 0
- if account.startswith(self.auto_create_account_prefix) and \
- not os.path.exists(broker.db_file):
- try:
- broker.initialize(req_timestamp.internal, obj_policy_index)
- except DatabaseAlreadyExists:
- pass
- if not os.path.exists(broker.db_file):
- return HTTPNotFound()
+ self._maybe_autocreate(broker, req_timestamp, account,
+ obj_policy_index)
+ # redirect if a shard exists for this object name
+ response = self._redirect_to_shard(req, broker, obj)
+ if response:
+ return response
+
broker.put_object(obj, req_timestamp.internal,
int(req.headers['x-size']),
req.headers['x-content-type'],
@@ -380,6 +455,22 @@ class ContainerController(BaseStorageServer):
req.headers.get('x-content-type-timestamp'),
req.headers.get('x-meta-timestamp'))
return HTTPCreated(request=req)
+
+ record_type = req.headers.get('x-backend-record-type', '').lower()
+ if record_type == RECORD_TYPE_SHARD:
+ try:
+ # validate incoming data...
+ shard_ranges = [ShardRange.from_dict(sr)
+ for sr in json.loads(req.body)]
+ except (ValueError, KeyError, TypeError) as err:
+ return HTTPBadRequest('Invalid body: %r' % err)
+ created = self._maybe_autocreate(broker, req_timestamp, account,
+ requested_policy_index)
+ self._update_metadata(req, broker, req_timestamp, 'PUT')
+ if shard_ranges:
+ # TODO: consider writing the shard ranges into the pending
+ # file, but if so ensure an all-or-none semantic for the write
+ broker.merge_shard_ranges(shard_ranges)
else: # put container
if requested_policy_index is None:
# use the default index sent by the proxy if available
@@ -391,31 +482,18 @@ class ContainerController(BaseStorageServer):
req_timestamp.internal,
new_container_policy,
requested_policy_index)
- metadata = {}
- metadata.update(
- (key, (value, req_timestamp.internal))
- for key, value in req.headers.items()
- if key.lower() in self.save_headers or
- is_sys_or_user_meta('container', key))
- if 'X-Container-Sync-To' in metadata:
- if 'X-Container-Sync-To' not in broker.metadata or \
- metadata['X-Container-Sync-To'][0] != \
- broker.metadata['X-Container-Sync-To'][0]:
- broker.set_x_container_sync_points(-1, -1)
- broker.update_metadata(metadata, validate_metadata=True)
- if metadata:
- self._update_sync_store(broker, 'PUT')
+ self._update_metadata(req, broker, req_timestamp, 'PUT')
resp = self.account_update(req, account, container, broker)
if resp:
return resp
- if created:
- return HTTPCreated(request=req,
- headers={'x-backend-storage-policy-index':
- broker.storage_policy_index})
- else:
- return HTTPAccepted(request=req,
- headers={'x-backend-storage-policy-index':
- broker.storage_policy_index})
+ if created:
+ return HTTPCreated(request=req,
+ headers={'x-backend-storage-policy-index':
+ broker.storage_policy_index})
+ else:
+ return HTTPAccepted(request=req,
+ headers={'x-backend-storage-policy-index':
+ broker.storage_policy_index})
@public
@timing_stats(sample_rate=0.1)
@@ -454,13 +532,18 @@ class ContainerController(BaseStorageServer):
:params record: object entry record
:returns: modified record
"""
- (name, created, size, content_type, etag) = record[:5]
- if content_type is None:
- return {'subdir': name.decode('utf8')}
- response = {'bytes': size, 'hash': etag, 'name': name.decode('utf8'),
- 'content_type': content_type}
+ if isinstance(record, ShardRange):
+ created = record.timestamp
+ response = dict(record)
+ else:
+ (name, created, size, content_type, etag) = record[:5]
+ if content_type is None:
+ return {'subdir': name.decode('utf8')}
+ response = {
+ 'bytes': size, 'hash': etag, 'name': name.decode('utf8'),
+ 'content_type': content_type}
+ override_bytes_from_content_type(response, logger=self.logger)
response['last_modified'] = Timestamp(created).isoformat
- override_bytes_from_content_type(response, logger=self.logger)
return response
@public
@@ -494,12 +577,45 @@ class ContainerController(BaseStorageServer):
pending_timeout=0.1,
stale_reads_ok=True)
info, is_deleted = broker.get_info_is_deleted()
- resp_headers = gen_resp_headers(info, is_deleted=is_deleted)
- if is_deleted:
- return HTTPNotFound(request=req, headers=resp_headers)
- container_list = broker.list_objects_iter(
- limit, marker, end_marker, prefix, delimiter, path,
- storage_policy_index=info['storage_policy_index'], reverse=reverse)
+ record_type = req.headers.get('x-backend-record-type', '').lower()
+ if record_type == 'auto' and info.get('db_state') in (SHARDING,
+ SHARDED):
+ record_type = 'shard'
+ if record_type == 'shard':
+ override_deleted = info and config_true_value(
+ req.headers.get('x-backend-override-deleted', False))
+ resp_headers = gen_resp_headers(
+ info, is_deleted=is_deleted and not override_deleted)
+ if is_deleted and not override_deleted:
+ return HTTPNotFound(request=req, headers=resp_headers)
+ resp_headers['X-Backend-Record-Type'] = 'shard'
+ includes = get_param(req, 'includes')
+ states = get_param(req, 'states')
+ fill_gaps = False
+ if states:
+ states = list_from_csv(states)
+ fill_gaps = any(('listing' in states, 'updating' in states))
+ try:
+ states = broker.resolve_shard_range_states(states)
+ except ValueError:
+ return HTTPBadRequest(request=req, body='Bad state')
+ include_deleted = config_true_value(
+ req.headers.get('x-backend-include-deleted', False))
+ container_list = broker.get_shard_ranges(
+ marker, end_marker, includes, reverse, states=states,
+ include_deleted=include_deleted, fill_gaps=fill_gaps)
+ else:
+ resp_headers = gen_resp_headers(info, is_deleted=is_deleted)
+ if is_deleted:
+ return HTTPNotFound(request=req, headers=resp_headers)
+ resp_headers['X-Backend-Record-Type'] = 'object'
+ # Use the retired db while container is in process of sharding,
+ # otherwise use current db
+ src_broker = broker.get_brokers()[0]
+ container_list = src_broker.list_objects_iter(
+ limit, marker, end_marker, prefix, delimiter, path,
+ storage_policy_index=info['storage_policy_index'],
+ reverse=reverse)
return self.create_listing(req, out_content_type, info, resp_headers,
broker.metadata, container_list, container)
@@ -562,20 +678,7 @@ class ContainerController(BaseStorageServer):
if broker.is_deleted():
return HTTPNotFound(request=req)
broker.update_put_timestamp(req_timestamp.internal)
- metadata = {}
- metadata.update(
- (key, (value, req_timestamp.internal))
- for key, value in req.headers.items()
- if key.lower() in self.save_headers or
- is_sys_or_user_meta('container', key))
- if metadata:
- if 'X-Container-Sync-To' in metadata:
- if 'X-Container-Sync-To' not in broker.metadata or \
- metadata['X-Container-Sync-To'][0] != \
- broker.metadata['X-Container-Sync-To'][0]:
- broker.set_x_container_sync_points(-1, -1)
- broker.update_metadata(metadata, validate_metadata=True)
- self._update_sync_store(broker, 'POST')
+ self._update_metadata(req, broker, req_timestamp, 'POST')
return HTTPNoContent(request=req)
def __call__(self, env, start_response):
diff --git a/swift/container/sharder.py b/swift/container/sharder.py
new file mode 100644
index 0000000000..06c2b6d9db
--- /dev/null
+++ b/swift/container/sharder.py
@@ -0,0 +1,1568 @@
+# Copyright (c) 2015 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import errno
+import json
+import time
+from collections import defaultdict
+from random import random
+
+import os
+import six
+from eventlet import Timeout
+
+from swift.common import internal_client, db_replicator
+from swift.common.constraints import check_drive
+from swift.common.direct_client import (direct_put_container,
+ DirectClientException)
+from swift.common.exceptions import DeviceUnavailable
+from swift.common.ring.utils import is_local_device
+from swift.common.utils import get_logger, config_true_value, \
+ dump_recon_cache, whataremyips, Timestamp, ShardRange, GreenAsyncPile, \
+ config_float_value, config_positive_int_value, \
+ quorum_size, parse_override_options, Everything, config_auto_int_value
+from swift.container.backend import ContainerBroker, \
+ RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, COLLAPSED, \
+ SHARD_UPDATE_STATES
+from swift.container.replicator import ContainerReplicator
+
+
+def sharding_enabled(broker):
+ # NB all shards will by default have been created with
+ # X-Container-Sysmeta-Sharding set and will therefore be candidates for
+ # sharding, along with explicitly configured root containers.
+ sharding = broker.metadata.get('X-Container-Sysmeta-Sharding')
+ if sharding and config_true_value(sharding[0]):
+ return True
+ # if broker has been marked deleted it will have lost sysmeta, but we still
+ # need to process the broker (for example, to shrink any shard ranges) so
+ # fallback to checking if it has any shard ranges
+ if broker.get_shard_ranges():
+ return True
+ return False
+
+
+def make_shard_ranges(broker, shard_data, shards_account_prefix):
+ timestamp = Timestamp.now()
+ shard_ranges = []
+ for data in shard_data:
+ # Make a copy so we don't mutate the original
+ kwargs = data.copy()
+ path = ShardRange.make_path(
+ shards_account_prefix + broker.root_account,
+ broker.root_container, broker.container,
+ timestamp, kwargs.pop('index'))
+
+ shard_ranges.append(ShardRange(path, timestamp, **kwargs))
+ return shard_ranges
+
+
+def find_missing_ranges(shard_ranges):
+ """
+ Find any ranges in the entire object namespace that are not covered by any
+ shard range in the given list.
+
+ :param shard_ranges: A list of :class:`~swift.utils.ShardRange`
+ :return: a list of missing ranges
+ """
+ gaps = []
+ if not shard_ranges:
+ return ((ShardRange.MIN, ShardRange.MAX),)
+ if shard_ranges[0].lower > ShardRange.MIN:
+ gaps.append((ShardRange.MIN, shard_ranges[0].lower))
+ for first, second in zip(shard_ranges, shard_ranges[1:]):
+ if first.upper < second.lower:
+ gaps.append((first.upper, second.lower))
+ if shard_ranges[-1].upper < ShardRange.MAX:
+ gaps.append((shard_ranges[-1].upper, ShardRange.MAX))
+ return gaps
+
+
+def find_overlapping_ranges(shard_ranges):
+ """
+ Find all pairs of overlapping ranges in the given list.
+
+ :param shard_ranges: A list of :class:`~swift.utils.ShardRange`
+ :return: a set of tuples, each tuple containing ranges that overlap with
+ each other.
+ """
+ result = set()
+ for shard_range in shard_ranges:
+ overlapping = [sr for sr in shard_ranges
+ if shard_range != sr and shard_range.overlaps(sr)]
+ if overlapping:
+ overlapping.append(shard_range)
+ overlapping.sort()
+ result.add(tuple(overlapping))
+
+ return result
+
+
+def is_sharding_candidate(shard_range, threshold):
+ return (shard_range.state == ShardRange.ACTIVE and
+ shard_range.object_count >= threshold)
+
+
+def find_sharding_candidates(broker, threshold, shard_ranges=None):
+ # this should only execute on root containers; the goal is to find
+ # large shard containers that should be sharded.
+ # First cut is simple: assume root container shard usage stats are good
+ # enough to make decision.
+ # TODO: object counts may well not be the appropriate metric for
+ # deciding to shrink because a shard with low object_count may have a
+ # large number of deleted object rows that will need to be merged with
+ # a neighbour. We may need to expose row count as well as object count.
+ if shard_ranges is None:
+ shard_ranges = broker.get_shard_ranges(states=[ShardRange.ACTIVE])
+ candidates = []
+ for shard_range in shard_ranges:
+ if not is_sharding_candidate(shard_range, threshold):
+ continue
+ shard_range.update_state(ShardRange.SHARDING,
+ state_timestamp=Timestamp.now())
+ shard_range.epoch = shard_range.state_timestamp
+ candidates.append(shard_range)
+ return candidates
+
+
+def find_shrinking_candidates(broker, shrink_threshold, merge_size):
+ # this should only execute on root containers that have sharded; the
+ # goal is to find small shard containers that could be retired by
+ # merging with a neighbour.
+ # First cut is simple: assume root container shard usage stats are good
+ # enough to make decision; only merge with upper neighbour so that
+ # upper bounds never change (shard names include upper bound).
+ # TODO: object counts may well not be the appropriate metric for
+ # deciding to shrink because a shard with low object_count may have a
+ # large number of deleted object rows that will need to be merged with
+ # a neighbour. We may need to expose row count as well as object count.
+ shard_ranges = broker.get_shard_ranges()
+ own_shard_range = broker.get_own_shard_range()
+ if len(shard_ranges) == 1:
+ # special case to enable final shard to shrink into root
+ shard_ranges.append(own_shard_range)
+
+ merge_pairs = {}
+ for donor, acceptor in zip(shard_ranges, shard_ranges[1:]):
+ if donor in merge_pairs:
+ # this range may already have been made an acceptor; if so then
+ # move on. In principle it might be that even after expansion
+ # this range and its donor(s) could all be merged with the next
+ # range. In practice it is much easier to reason about a single
+ # donor merging into a single acceptor. Don't fret - eventually
+ # all the small ranges will be retired.
+ continue
+ if (acceptor.name != own_shard_range.name and
+ acceptor.state != ShardRange.ACTIVE):
+ # don't shrink into a range that is not yet ACTIVE
+ continue
+ if donor.state not in (ShardRange.ACTIVE, ShardRange.SHRINKING):
+ # found? created? sharded? don't touch it
+ continue
+
+ proposed_object_count = donor.object_count + acceptor.object_count
+ if (donor.state == ShardRange.SHRINKING or
+ (donor.object_count < shrink_threshold and
+ proposed_object_count < merge_size)):
+ # include previously identified merge pairs on presumption that
+ # following shrink procedure is idempotent
+ merge_pairs[acceptor] = donor
+ if donor.update_state(ShardRange.SHRINKING):
+ # Set donor state to shrinking so that next cycle won't use
+ # it as an acceptor; state_timestamp defines new epoch for
+ # donor and new timestamp for the expanded acceptor below.
+ donor.epoch = donor.state_timestamp = Timestamp.now()
+ if acceptor.lower != donor.lower:
+ # Update the acceptor container with its expanding state to
+ # prevent it treating objects cleaved from the donor
+ # as misplaced.
+ acceptor.lower = donor.lower
+ acceptor.timestamp = donor.state_timestamp
+ return merge_pairs
+
+
+class CleavingContext(object):
+ def __init__(self, ref, cursor='', max_row=None, cleave_to_row=None,
+ last_cleave_to_row=None, cleaving_done=False,
+ misplaced_done=False, ranges_done=0, ranges_todo=0):
+ self.ref = ref
+ self._cursor = None
+ self.cursor = cursor
+ self.max_row = max_row
+ self.cleave_to_row = cleave_to_row
+ self.last_cleave_to_row = last_cleave_to_row
+ self.cleaving_done = cleaving_done
+ self.misplaced_done = misplaced_done
+ self.ranges_done = ranges_done
+ self.ranges_todo = ranges_todo
+
+ def __iter__(self):
+ yield 'ref', self.ref
+ yield 'cursor', self.cursor
+ yield 'max_row', self.max_row
+ yield 'cleave_to_row', self.cleave_to_row
+ yield 'last_cleave_to_row', self.last_cleave_to_row
+ yield 'cleaving_done', self.cleaving_done
+ yield 'misplaced_done', self.misplaced_done
+ yield 'ranges_done', self.ranges_done
+ yield 'ranges_todo', self.ranges_todo
+
+ def _encode(cls, value):
+ if value is not None and six.PY2 and isinstance(value, six.text_type):
+ return value.encode('utf-8')
+ return value
+
+ @property
+ def cursor(self):
+ return self._cursor
+
+ @cursor.setter
+ def cursor(self, value):
+ self._cursor = self._encode(value)
+
+ @property
+ def marker(self):
+ return self.cursor + '\x00'
+
+ @classmethod
+ def _make_ref(cls, broker):
+ return broker.get_info()['id']
+
+ @classmethod
+ def load(cls, broker):
+ """
+ Returns a context dict for tracking the progress of cleaving this
+ broker's retiring DB. The context is persisted in sysmeta using a key
+ that is based off the retiring db id and max row. This form of
+ key ensures that a cleaving context is only loaded for a db that
+ matches the id and max row when the context was created; if a db is
+ modified such that its max row changes then a different context, or no
+ context, will be loaded.
+
+ :return: A dict to which cleave progress metadata may be added. The
+ dict initially has a key ``ref`` which should not be modified by
+ any caller.
+ """
+ brokers = broker.get_brokers()
+ ref = cls._make_ref(brokers[0])
+ data = brokers[-1].get_sharding_sysmeta('Context-' + ref)
+ data = json.loads(data) if data else {}
+ data['ref'] = ref
+ data['max_row'] = brokers[0].get_max_row()
+ return cls(**data)
+
+ def store(self, broker):
+ broker.set_sharding_sysmeta('Context-' + self.ref,
+ json.dumps(dict(self)))
+
+ def reset(self):
+ self.cursor = ''
+ self.ranges_done = 0
+ self.ranges_todo = 0
+ self.cleaving_done = False
+ self.misplaced_done = False
+ self.last_cleave_to_row = self.cleave_to_row
+
+ def start(self):
+ self.cursor = ''
+ self.ranges_done = 0
+ self.ranges_todo = 0
+ self.cleaving_done = False
+ self.cleave_to_row = self.max_row
+
+ def done(self):
+ return all((self.misplaced_done, self.cleaving_done,
+ self.max_row == self.cleave_to_row))
+
+
+DEFAULT_SHARD_CONTAINER_THRESHOLD = 10000000
+DEFAULT_SHARD_SHRINK_POINT = 25
+DEFAULT_SHARD_MERGE_POINT = 75
+
+
+class ContainerSharder(ContainerReplicator):
+ """Shards containers."""
+
+ def __init__(self, conf, logger=None):
+ logger = logger or get_logger(conf, log_route='container-sharder')
+ super(ContainerSharder, self).__init__(conf, logger=logger)
+ self.shards_account_prefix = (
+ (conf.get('auto_create_account_prefix') or '.') + 'shards_')
+
+ def percent_value(key, default):
+ try:
+ value = conf.get(key, default)
+ return config_float_value(value, 0, 100) / 100.0
+ except ValueError as err:
+ raise ValueError("%s: %s" % (str(err), key))
+
+ self.shard_shrink_point = percent_value('shard_shrink_point',
+ DEFAULT_SHARD_SHRINK_POINT)
+ self.shrink_merge_point = percent_value('shard_shrink_merge_point',
+ DEFAULT_SHARD_MERGE_POINT)
+ self.shard_container_threshold = config_positive_int_value(
+ conf.get('shard_container_threshold',
+ DEFAULT_SHARD_CONTAINER_THRESHOLD))
+ self.shrink_size = (self.shard_container_threshold *
+ self.shard_shrink_point)
+ self.merge_size = (self.shard_container_threshold *
+ self.shrink_merge_point)
+ self.split_size = self.shard_container_threshold // 2
+ self.scanner_batch_size = config_positive_int_value(
+ conf.get('shard_scanner_batch_size', 10))
+ self.cleave_batch_size = config_positive_int_value(
+ conf.get('cleave_batch_size', 2))
+ self.cleave_row_batch_size = config_positive_int_value(
+ conf.get('cleave_row_batch_size', 10000))
+ self.auto_shard = config_true_value(conf.get('auto_shard', False))
+ self.sharding_candidates = []
+ self.recon_candidates_limit = int(
+ conf.get('recon_candidates_limit', 5))
+ self.broker_timeout = config_positive_int_value(
+ conf.get('broker_timeout', 60))
+ replica_count = self.ring.replica_count
+ quorum = quorum_size(replica_count)
+ self.shard_replication_quorum = config_auto_int_value(
+ conf.get('shard_replication_quorum'), quorum)
+ if self.shard_replication_quorum > replica_count:
+ self.logger.warning(
+ 'shard_replication_quorum of %s exceeds replica count %s'
+ ', reducing to %s', self.shard_replication_quorum,
+ replica_count, replica_count)
+ self.shard_replication_quorum = replica_count
+ self.existing_shard_replication_quorum = config_auto_int_value(
+ conf.get('existing_shard_replication_quorum'),
+ self.shard_replication_quorum)
+ if self.existing_shard_replication_quorum > replica_count:
+ self.logger.warning(
+ 'existing_shard_replication_quorum of %s exceeds replica count'
+ ' %s, reducing to %s', self.existing_shard_replication_quorum,
+ replica_count, replica_count)
+ self.existing_shard_replication_quorum = replica_count
+
+ # internal client
+ self.conn_timeout = float(conf.get('conn_timeout', 5))
+ request_tries = config_positive_int_value(
+ conf.get('request_tries', 3))
+ internal_client_conf_path = conf.get('internal_client_conf_path',
+ '/etc/swift/internal-client.conf')
+ try:
+ self.int_client = internal_client.InternalClient(
+ internal_client_conf_path,
+ 'Swift Container Sharder',
+ request_tries,
+ allow_modify_pipeline=False)
+ except IOError as err:
+ if err.errno != errno.ENOENT:
+ raise
+ raise SystemExit(
+ 'Unable to load internal client from config: %r (%s)' %
+ (internal_client_conf_path, err))
+ self.reported = 0
+
+ def _zero_stats(self):
+ """Zero out the stats."""
+ super(ContainerSharder, self)._zero_stats()
+ # all sharding stats that are additional to the inherited replicator
+ # stats are maintained under the 'sharding' key in self.stats
+ self.stats['sharding'] = defaultdict(lambda: defaultdict(int))
+ self.sharding_candidates = []
+
+ def _append_stat(self, category, key, value):
+ if not self.stats['sharding'][category][key]:
+ self.stats['sharding'][category][key] = list()
+ self.stats['sharding'][category][key].append(value)
+
+ def _min_stat(self, category, key, value):
+ current = self.stats['sharding'][category][key]
+ if not current:
+ self.stats['sharding'][category][key] = value
+ else:
+ self.stats['sharding'][category][key] = min(current, value)
+
+ def _max_stat(self, category, key, value):
+ current = self.stats['sharding'][category][key]
+ if not current:
+ self.stats['sharding'][category][key] = value
+ else:
+ self.stats['sharding'][category][key] = max(current, value)
+
+ def _increment_stat(self, category, key, step=1, statsd=False):
+ self.stats['sharding'][category][key] += step
+ if statsd:
+ statsd_key = '%s_%s' % (category, key)
+ self.logger.increment(statsd_key)
+
+ def _make_stats_info(self, broker, node, own_shard_range):
+ try:
+ file_size = os.stat(broker.db_file).st_size
+ except OSError:
+ file_size = None
+
+ return {'path': broker.db_file,
+ 'node_index': node.get('index'),
+ 'account': broker.account,
+ 'container': broker.container,
+ 'root': broker.root_path,
+ 'object_count': own_shard_range.object_count,
+ 'meta_timestamp': own_shard_range.meta_timestamp.internal,
+ 'file_size': file_size}
+
+ def _identify_sharding_candidate(self, broker, node):
+ own_shard_range = broker.get_own_shard_range()
+ if is_sharding_candidate(
+ own_shard_range, self.shard_container_threshold):
+ self.sharding_candidates.append(
+ self._make_stats_info(broker, node, own_shard_range))
+
+ def _transform_sharding_candidate_stats(self):
+ category = self.stats['sharding']['sharding_candidates']
+ candidates = self.sharding_candidates
+ category['found'] = len(candidates)
+ candidates.sort(key=lambda c: c['object_count'], reverse=True)
+ if self.recon_candidates_limit >= 0:
+ category['top'] = candidates[:self.recon_candidates_limit]
+ else:
+ category['top'] = candidates
+
+ def _record_sharding_progress(self, broker, node, error):
+ own_shard_range = broker.get_own_shard_range()
+ if (broker.get_db_state() in (UNSHARDED, SHARDING) and
+ own_shard_range.state in (ShardRange.SHARDING,
+ ShardRange.SHARDED)):
+ info = self._make_stats_info(broker, node, own_shard_range)
+ info['state'] = own_shard_range.state_text
+ info['db_state'] = broker.get_db_state()
+ states = [ShardRange.FOUND, ShardRange.CREATED,
+ ShardRange.CLEAVED, ShardRange.ACTIVE]
+ shard_ranges = broker.get_shard_ranges(states=states)
+ state_count = {}
+ for state in states:
+ state_count[ShardRange.STATES[state]] = 0
+ for shard_range in shard_ranges:
+ state_count[shard_range.state_text] += 1
+ info.update(state_count)
+ info['error'] = error and str(error)
+ self._append_stat('sharding_in_progress', 'all', info)
+
+ def _report_stats(self):
+ # report accumulated stats since start of one sharder cycle
+ default_stats = ('attempted', 'success', 'failure')
+ category_keys = (
+ ('visited', default_stats + ('skipped', 'completed')),
+ ('scanned', default_stats + ('found', 'min_time', 'max_time')),
+ ('created', default_stats),
+ ('cleaved', default_stats + ('min_time', 'max_time',)),
+ ('misplaced', default_stats + ('found', 'placed', 'unplaced')),
+ ('audit_root', default_stats),
+ ('audit_shard', default_stats),
+ )
+
+ now = time.time()
+ last_report = time.ctime(self.stats['start'])
+ elapsed = now - self.stats['start']
+ sharding_stats = self.stats['sharding']
+ for category, keys in category_keys:
+ stats = sharding_stats[category]
+ msg = ' '.join(['%s:%s' % (k, str(stats[k])) for k in keys])
+ self.logger.info('Since %s %s - %s', last_report, category, msg)
+
+ self._transform_sharding_candidate_stats()
+
+ dump_recon_cache(
+ {'sharding_stats': self.stats,
+ 'sharding_time': elapsed,
+ 'sharding_last': now},
+ self.rcache, self.logger)
+ self.reported = now
+
+ def _periodic_report_stats(self):
+ if (time.time() - self.reported) >= 3600: # once an hour
+ self._report_stats()
+
+ def _check_node(self, node):
+ if not node:
+ return False
+ if not is_local_device(self.ips, self.port,
+ node['replication_ip'],
+ node['replication_port']):
+ return False
+ if not check_drive(self.root, node['device'],
+ self.mount_check):
+ self.logger.warning(
+ 'Skipping %(device)s as it is not mounted' % node)
+ return False
+ return True
+
+ def _fetch_shard_ranges(self, broker, newest=False, params=None,
+ include_deleted=False):
+ path = self.int_client.make_path(broker.root_account,
+ broker.root_container)
+ params = params or {}
+ params.setdefault('format', 'json')
+ headers = {'X-Backend-Record-Type': 'shard',
+ 'X-Backend-Override-Deleted': 'true',
+ 'X-Backend-Include-Deleted': str(include_deleted)}
+ if newest:
+ headers['X-Newest'] = 'true'
+ try:
+ try:
+ resp = self.int_client.make_request(
+ 'GET', path, headers, acceptable_statuses=(2,),
+ params=params)
+ except internal_client.UnexpectedResponse as err:
+ self.logger.warning("Failed to get shard ranges from %s: %s",
+ broker.root_path, err)
+ return None
+ record_type = resp.headers.get('x-backend-record-type')
+ if record_type != 'shard':
+ err = 'unexpected record type %r' % record_type
+ self.logger.error("Failed to get shard ranges from %s: %s",
+ broker.root_path, err)
+ return None
+
+ try:
+ data = json.loads(resp.body)
+ if not isinstance(data, list):
+ raise ValueError('not a list')
+ return [ShardRange.from_dict(shard_range)
+ for shard_range in data]
+ except (ValueError, TypeError, KeyError) as err:
+ self.logger.error(
+ "Failed to get shard ranges from %s: invalid data: %r",
+ broker.root_path, err)
+ return None
+ finally:
+ self.logger.txn_id = None
+
+ def _put_container(self, node, part, account, container, headers, body):
+ try:
+ direct_put_container(node, part, account, container,
+ conn_timeout=self.conn_timeout,
+ response_timeout=self.node_timeout,
+ headers=headers, contents=body)
+ except DirectClientException as err:
+ self.logger.warning(
+ 'Failed to put shard ranges to %s:%s/%s: %s',
+ node['ip'], node['port'], node['device'], err.http_status)
+ except (Exception, Timeout) as err:
+ self.logger.exception(
+ 'Failed to put shard ranges to %s:%s/%s: %s',
+ node['ip'], node['port'], node['device'], err)
+ else:
+ return True
+ return False
+
+ def _send_shard_ranges(self, account, container, shard_ranges,
+ headers=None):
+ body = json.dumps([dict(sr) for sr in shard_ranges])
+ part, nodes = self.ring.get_nodes(account, container)
+ headers = headers or {}
+ headers.update({'X-Backend-Record-Type': RECORD_TYPE_SHARD,
+ 'User-Agent': 'container-sharder %s' % os.getpid(),
+ 'X-Timestamp': Timestamp.now().normal,
+ 'Content-Length': len(body),
+ 'Content-Type': 'application/json'})
+
+ pool = GreenAsyncPile(len(nodes))
+ for node in nodes:
+ pool.spawn(self._put_container, node, part, account,
+ container, headers, body)
+
+ results = pool.waitall(None)
+ return results.count(True) >= quorum_size(self.ring.replica_count)
+
+ def _get_shard_broker(self, shard_range, root_path, policy_index):
+ """
+ Get a broker for a container db for the given shard range. If one of
+ the shard container's primary nodes is a local device then that will be
+ chosen for the db, otherwise the first of the shard container's handoff
+ nodes that is local will be chosen.
+
+ :param shard_range: a :class:`~swift.common.utils.ShardRange`
+ :param root_path: the path of the shard's root container
+ :param policy_index: the storage policy index
+ :returns: a tuple of ``(part, broker, node_id)`` where ``part`` is the
+ shard container's partition, ``broker`` is an instance of
+ :class:`~swift.container.backend.ContainerBroker`,
+ ``node_id`` is the id of the selected node.
+ """
+ part = self.ring.get_part(shard_range.account, shard_range.container)
+ node = self.find_local_handoff_for_part(part)
+ if not node:
+ raise DeviceUnavailable(
+ 'No mounted devices found suitable for creating shard broker'
+ 'for %s in partition %s' % (shard_range.name, part))
+
+ shard_broker = ContainerBroker.create_broker(
+ os.path.join(self.root, node['device']), part, shard_range.account,
+ shard_range.container, epoch=shard_range.epoch,
+ storage_policy_index=policy_index)
+
+ # Get the valid info into the broker.container, etc
+ shard_broker.get_info()
+ shard_broker.merge_shard_ranges(shard_range)
+ shard_broker.set_sharding_sysmeta('Root', root_path)
+ shard_broker.update_metadata({
+ 'X-Container-Sysmeta-Sharding':
+ ('True', Timestamp.now().internal)})
+
+ return part, shard_broker, node['id']
+
+ def _audit_root_container(self, broker):
+ # This is the root container, and therefore the tome of knowledge,
+ # all we can do is check there is nothing screwy with the ranges
+ self._increment_stat('audit_root', 'attempted')
+ warnings = []
+ own_shard_range = broker.get_own_shard_range()
+
+ if own_shard_range.state in (ShardRange.SHARDING, ShardRange.SHARDED):
+ shard_ranges = broker.get_shard_ranges()
+ missing_ranges = find_missing_ranges(shard_ranges)
+ if missing_ranges:
+ warnings.append(
+ 'missing range(s): %s' %
+ ' '.join(['%s-%s' % (lower, upper)
+ for lower, upper in missing_ranges]))
+
+ for state in ShardRange.STATES:
+ shard_ranges = broker.get_shard_ranges(states=state)
+ overlaps = find_overlapping_ranges(shard_ranges)
+ for overlapping_ranges in overlaps:
+ warnings.append(
+ 'overlapping ranges in state %s: %s' %
+ (ShardRange.STATES[state],
+ ' '.join(['%s-%s' % (sr.lower, sr.upper)
+ for sr in overlapping_ranges])))
+
+ if warnings:
+ self.logger.warning(
+ 'Audit failed for root %s (%s): %s' %
+ (broker.db_file, broker.path, ', '.join(warnings)))
+ self._increment_stat('audit_root', 'failure', statsd=True)
+ return False
+
+ self._increment_stat('audit_root', 'success', statsd=True)
+ return True
+
+ def _audit_shard_container(self, broker):
+ # Get the root view of the world.
+ self._increment_stat('audit_shard', 'attempted')
+ warnings = []
+ errors = []
+ if not broker.account.startswith(self.shards_account_prefix):
+ warnings.append('account not in shards namespace %r' %
+ self.shards_account_prefix)
+
+ own_shard_range = broker.get_own_shard_range(no_default=True)
+
+ shard_range = None
+ if own_shard_range:
+ shard_ranges = self._fetch_shard_ranges(
+ broker, newest=True,
+ params={'marker': own_shard_range.lower,
+ 'end_marker': own_shard_range.upper},
+ include_deleted=True)
+ if shard_ranges:
+ for shard_range in shard_ranges:
+ if (shard_range.lower == own_shard_range.lower and
+ shard_range.upper == own_shard_range.upper and
+ shard_range.name == own_shard_range.name):
+ break
+ else:
+ # this is not necessarily an error - some replicas of the
+ # root may not yet know about this shard container
+ warnings.append('root has no matching shard range')
+ shard_range = None
+ else:
+ warnings.append('unable to get shard ranges from root')
+ else:
+ errors.append('missing own shard range')
+
+ if warnings:
+ self.logger.warning(
+ 'Audit warnings for shard %s (%s): %s' %
+ (broker.db_file, broker.path, ', '.join(warnings)))
+
+ if errors:
+ self.logger.warning(
+ 'Audit failed for shard %s (%s) - skipping: %s' %
+ (broker.db_file, broker.path, ', '.join(errors)))
+ self._increment_stat('audit_shard', 'failure', statsd=True)
+ return False
+
+ if shard_range:
+ self.logger.debug('Updating shard from root %s', dict(shard_range))
+ broker.merge_shard_ranges(shard_range)
+ own_shard_range = broker.get_own_shard_range()
+ delete_age = time.time() - self.reclaim_age
+ if (own_shard_range.state == ShardRange.SHARDED and
+ own_shard_range.deleted and
+ own_shard_range.timestamp < delete_age and
+ broker.empty()):
+ broker.delete_db(Timestamp.now().internal)
+ self.logger.debug('Deleted shard container %s (%s)',
+ broker.db_file, broker.path)
+ self._increment_stat('audit_shard', 'success', statsd=True)
+ return True
+
+ def _audit_container(self, broker):
+ if broker.is_deleted():
+ # if the container has been marked as deleted, all metadata will
+ # have been erased so no point auditing. But we want it to pass, in
+ # case any objects exist inside it.
+ return True
+ if broker.is_root_container():
+ return self._audit_root_container(broker)
+ return self._audit_shard_container(broker)
+
+ def yield_objects(self, broker, src_shard_range, since_row=None):
+ """
+ Iterates through all objects in ``src_shard_range`` in name order
+ yielding them in lists of up to CONTAINER_LISTING_LIMIT length.
+
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
+ :param src_shard_range: A :class:`~swift.common.utils.ShardRange`
+ describing the source range.
+ :param since_row: include only items whose ROWID is greater than
+ the given row id; by default all rows are included.
+ :return: a generator of tuples of (list of objects, broker info dict)
+ """
+ for include_deleted in (False, True):
+ marker = src_shard_range.lower_str
+ while True:
+ info = broker.get_info()
+ info['max_row'] = broker.get_max_row()
+ start = time.time()
+ objects = broker.get_objects(
+ self.cleave_row_batch_size,
+ marker=marker,
+ end_marker=src_shard_range.end_marker,
+ include_deleted=include_deleted,
+ since_row=since_row)
+ if objects:
+ self.logger.debug('got %s objects from %s in %ss',
+ len(objects), broker.db_file,
+ time.time() - start)
+ yield objects, info
+
+ if len(objects) < self.cleave_row_batch_size:
+ break
+ marker = objects[-1]['name']
+
+ def yield_objects_to_shard_range(self, broker, src_shard_range,
+ dest_shard_ranges):
+ """
+ Iterates through all objects in ``src_shard_range`` to place them in
+ destination shard ranges provided by the ``next_shard_range`` function.
+ Yields tuples of (object list, destination shard range in which those
+ objects belong). Note that the same destination shard range may be
+ referenced in more than one yielded tuple.
+
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
+ :param src_shard_range: A :class:`~swift.common.utils.ShardRange`
+ describing the source range.
+ :param dest_shard_ranges: A function which should return a list of
+ destination shard ranges in name order.
+ :return: a generator of tuples of
+ (object list, shard range, broker info dict)
+ """
+ dest_shard_range_iter = dest_shard_range = None
+ for objs, info in self.yield_objects(broker, src_shard_range):
+ if not objs:
+ return
+
+ def next_or_none(it):
+ try:
+ return next(it)
+ except StopIteration:
+ return None
+
+ if dest_shard_range_iter is None:
+ dest_shard_range_iter = iter(dest_shard_ranges())
+ dest_shard_range = next_or_none(dest_shard_range_iter)
+
+ unplaced = False
+ last_index = next_index = 0
+ for obj in objs:
+ if dest_shard_range is None:
+ # no more destinations: yield remainder of batch and return
+ # NB there may be more batches of objects but none of them
+ # will be placed so no point fetching them
+ yield objs[last_index:], None, info
+ return
+ if obj['name'] <= dest_shard_range.lower:
+ unplaced = True
+ elif unplaced:
+ # end of run of unplaced objects, yield them
+ yield objs[last_index:next_index], None, info
+ last_index = next_index
+ unplaced = False
+ while (dest_shard_range and
+ obj['name'] > dest_shard_range.upper):
+ if next_index != last_index:
+ # yield the objects in current dest_shard_range
+ yield (objs[last_index:next_index],
+ dest_shard_range,
+ info)
+ last_index = next_index
+ dest_shard_range = next_or_none(dest_shard_range_iter)
+ next_index += 1
+
+ if next_index != last_index:
+ # yield tail of current batch of objects
+ # NB there may be more objects for the current
+ # dest_shard_range in the next batch from yield_objects
+ yield (objs[last_index:next_index],
+ None if unplaced else dest_shard_range,
+ info)
+
+ def _post_replicate_hook(self, broker, info, responses):
+ # override superclass behaviour
+ pass
+
+ def _replicate_and_delete(self, broker, dest_shard_range, part,
+ dest_broker, node_id, info):
+ success, responses = self._replicate_object(
+ part, dest_broker.db_file, node_id)
+ quorum = quorum_size(self.ring.replica_count)
+ if not success and responses.count(True) < quorum:
+ self.logger.warning(
+ 'Failed to sufficiently replicate misplaced objects: %s in %s '
+ '(not removing)', dest_shard_range, broker.path)
+ return False
+
+ if broker.get_info()['id'] != info['id']:
+ # the db changed - don't remove any objects
+ success = False
+ else:
+ # remove objects up to the max row of the db sampled prior to
+ # the first object yielded for this destination; objects added
+ # after that point may not have been yielded and replicated so
+ # it is not safe to remove them yet
+ broker.remove_objects(
+ dest_shard_range.lower_str,
+ dest_shard_range.upper_str,
+ max_row=info['max_row'])
+ success = True
+
+ if not success:
+ self.logger.warning(
+ 'Refused to remove misplaced objects: %s in %s',
+ dest_shard_range, broker.path)
+ return success
+
+ def _move_objects(self, src_broker, src_shard_range, policy_index,
+ shard_range_fetcher):
+ # move objects from src_shard_range in src_broker to destination shard
+ # ranges provided by shard_range_fetcher
+ dest_brokers = {} # map shard range -> broker
+ placed = unplaced = 0
+ success = True
+ for objs, dest_shard_range, info in self.yield_objects_to_shard_range(
+ src_broker, src_shard_range, shard_range_fetcher):
+ if not dest_shard_range:
+ unplaced += len(objs)
+ success = False
+ continue
+
+ if dest_shard_range.name == src_broker.path:
+ self.logger.debug(
+ 'Skipping source as misplaced objects destination')
+ # in shrinking context, the misplaced objects might actually be
+ # correctly placed if the root has expanded this shard but this
+ # broker has not yet been updated
+ continue
+
+ if dest_shard_range not in dest_brokers:
+ part, dest_broker, node_id = self._get_shard_broker(
+ dest_shard_range, src_broker.root_path, policy_index)
+ # save the broker info that was sampled prior to the *first*
+ # yielded objects for this destination
+ destination = {'part': part,
+ 'dest_broker': dest_broker,
+ 'node_id': node_id,
+ 'info': info}
+ dest_brokers[dest_shard_range] = destination
+ else:
+ destination = dest_brokers[dest_shard_range]
+ destination['dest_broker'].merge_items(objs)
+ placed += len(objs)
+
+ if unplaced:
+ self.logger.warning(
+ 'Failed to find destination for at least %s misplaced objects '
+ 'in %s' % (unplaced, src_broker.path))
+
+ # TODO: consider executing the replication jobs concurrently
+ for dest_shard_range, dest_args in dest_brokers.items():
+ self.logger.debug('moving misplaced objects found in range %s' %
+ dest_shard_range)
+ success &= self._replicate_and_delete(
+ src_broker, dest_shard_range, **dest_args)
+
+ self._increment_stat('misplaced', 'placed', step=placed)
+ self._increment_stat('misplaced', 'unplaced', step=unplaced)
+ return success, placed + unplaced
+
+ def _make_shard_range_fetcher(self, broker, src_shard_range):
+ # returns a function that will lazy load shard ranges on demand;
+ # this means only one lookup is made for all misplaced ranges.
+ outer = {}
+
+ def shard_range_fetcher():
+ if not outer:
+ if broker.is_root_container():
+ ranges = broker.get_shard_ranges(
+ marker=src_shard_range.lower_str,
+ end_marker=src_shard_range.end_marker,
+ states=SHARD_UPDATE_STATES)
+ else:
+ # TODO: the root may not yet know about shard ranges to
+ # which a shard is sharding, but those could come from
+ # the broker
+ ranges = self._fetch_shard_ranges(
+ broker, newest=True,
+ params={'states': 'updating',
+ 'marker': src_shard_range.lower_str,
+ 'end_marker': src_shard_range.end_marker})
+ outer['ranges'] = iter(ranges)
+ return outer['ranges']
+ return shard_range_fetcher
+
+ def _make_default_misplaced_object_bounds(self, broker):
+ # Objects outside of this container's own range are misplaced.
+ own_shard_range = broker.get_own_shard_range()
+ bounds = []
+ if own_shard_range.lower:
+ bounds.append(('', own_shard_range.lower))
+ if own_shard_range.upper:
+ bounds.append((own_shard_range.upper, ''))
+ return bounds
+
+ def _make_misplaced_object_bounds(self, broker):
+ bounds = []
+ state = broker.get_db_state()
+ if state == SHARDED:
+ # Anything in the object table is treated as a misplaced object.
+ bounds.append(('', ''))
+
+ if not bounds and state == SHARDING:
+ # Objects outside of this container's own range are misplaced.
+ # Objects in already cleaved shard ranges are also misplaced.
+ cleave_context = CleavingContext.load(broker)
+ if cleave_context.cursor:
+ bounds.append(('', cleave_context.cursor))
+ own_shard_range = broker.get_own_shard_range()
+ if own_shard_range.upper:
+ bounds.append((own_shard_range.upper, ''))
+
+ return bounds or self._make_default_misplaced_object_bounds(broker)
+
+ def _move_misplaced_objects(self, broker, src_broker=None,
+ src_bounds=None):
+ """
+ Search for objects in the given broker that do not belong in that
+ broker's namespace and move those objects to their correct shard
+ container.
+
+ :param broker: An instance of :class:`swift.container.ContainerBroker`.
+ :param src_broker: optional alternative broker to use as the source
+ of misplaced objects; if not specified then ``broker`` is used as
+ the source.
+ :param src_bounds: optional list of (lower, upper) namespace bounds to
+ use when searching for misplaced objects
+ :return: True if all misplaced objects were sufficiently replicated to
+ their correct shard containers, False otherwise
+ """
+ self.logger.debug('Looking for misplaced objects in %s (%s)',
+ broker.path.decode('utf-8'), broker.db_file)
+ self._increment_stat('misplaced', 'attempted')
+ src_broker = src_broker or broker
+ if src_bounds is None:
+ src_bounds = self._make_misplaced_object_bounds(broker)
+ # (ab)use ShardRange instances to encapsulate source namespaces
+ src_ranges = [ShardRange('dont/care', Timestamp.now(), lower, upper)
+ for lower, upper in src_bounds]
+ self.logger.debug('misplaced object source bounds %s' % src_bounds)
+ policy_index = broker.storage_policy_index
+ success = True
+ num_found = 0
+ for src_shard_range in src_ranges:
+ part_success, part_num_found = self._move_objects(
+ src_broker, src_shard_range, policy_index,
+ self._make_shard_range_fetcher(broker, src_shard_range))
+ success &= part_success
+ num_found += part_num_found
+
+ if num_found:
+ self._increment_stat('misplaced', 'found', statsd=True)
+ self.logger.debug('Moved %s misplaced objects' % num_found)
+ self._increment_stat('misplaced', 'success' if success else 'failure')
+ self.logger.debug('Finished handling misplaced objects')
+ return success
+
+ def _find_shard_ranges(self, broker):
+ """
+ Scans the container to find shard ranges and adds them to the shard
+ ranges table. If there are existing shard ranges then scanning starts
+ from the upper bound of the uppermost existing shard range.
+
+ :param broker: An instance of :class:`swift.container.ContainerBroker`
+ :return: a tuple of (success, num of shard ranges found) where success
+ is True if the last shard range has been found, False otherwise.
+ """
+ own_shard_range = broker.get_own_shard_range()
+ shard_ranges = broker.get_shard_ranges()
+ if shard_ranges and shard_ranges[-1].upper >= own_shard_range.upper:
+ self.logger.debug('Scan already completed for %s', broker.path)
+ return 0
+
+ self.logger.info('Starting scan for shard ranges on %s', broker.path)
+ self._increment_stat('scanned', 'attempted')
+
+ start = time.time()
+ shard_data, last_found = broker.find_shard_ranges(
+ self.split_size, limit=self.scanner_batch_size,
+ existing_ranges=shard_ranges)
+ elapsed = time.time() - start
+
+ if not shard_data:
+ if last_found:
+ self.logger.info("Already found all shard ranges")
+ self._increment_stat('scanned', 'success', statsd=True)
+ else:
+ # we didn't find anything
+ self.logger.warning("No shard ranges found")
+ self._increment_stat('scanned', 'failure', statsd=True)
+ return 0
+
+ shard_ranges = make_shard_ranges(
+ broker, shard_data, self.shards_account_prefix)
+ broker.merge_shard_ranges(shard_ranges)
+ num_found = len(shard_ranges)
+ self.logger.info(
+ "Completed scan for shard ranges: %d found", num_found)
+ self._increment_stat('scanned', 'found', step=num_found)
+ self._min_stat('scanned', 'min_time', round(elapsed / num_found, 3))
+ self._max_stat('scanned', 'max_time', round(elapsed / num_found, 3))
+
+ if last_found:
+ self.logger.info("Final shard range reached.")
+ self._increment_stat('scanned', 'success', statsd=True)
+ return num_found
+
+ def _create_shard_containers(self, broker):
+ # Create shard containers that are ready to receive redirected object
+ # updates. Do this now, so that redirection can begin immediately
+ # without waiting for cleaving to complete.
+ found_ranges = broker.get_shard_ranges(states=ShardRange.FOUND)
+ created_ranges = []
+ for shard_range in found_ranges:
+ self._increment_stat('created', 'attempted')
+ shard_range.update_state(ShardRange.CREATED)
+ headers = {
+ 'X-Backend-Storage-Policy-Index': broker.storage_policy_index,
+ 'X-Container-Sysmeta-Shard-Root': broker.root_path,
+ 'X-Container-Sysmeta-Sharding': True}
+ success = self._send_shard_ranges(
+ shard_range.account, shard_range.container,
+ [shard_range], headers=headers)
+ if success:
+ self.logger.debug('PUT new shard range container for %s',
+ shard_range)
+ self._increment_stat('created', 'success', statsd=True)
+ else:
+ self.logger.error(
+ 'PUT of new shard container %r failed for %s.',
+ shard_range, broker.path)
+ self._increment_stat('created', 'failure', statsd=True)
+ # break, not continue, because elsewhere it is assumed that
+ # finding and cleaving shard ranges progresses linearly, so we
+ # do not want any subsequent shard ranges to be in created
+ # state while this one is still in found state
+ break
+ created_ranges.append(shard_range)
+
+ if created_ranges:
+ broker.merge_shard_ranges(created_ranges)
+ if not broker.is_root_container():
+ self._send_shard_ranges(
+ broker.root_account, broker.root_container, created_ranges)
+ self.logger.info(
+ "Completed creating shard range containers: %d created.",
+ len(created_ranges))
+ return len(created_ranges)
+
+ def _cleave_shard_range(self, broker, cleaving_context, shard_range):
+ self.logger.info("Cleaving '%s' from row %s into %s for %r",
+ broker.path, cleaving_context.last_cleave_to_row,
+ shard_range.name, shard_range)
+ self._increment_stat('cleaved', 'attempted')
+ start = time.time()
+ policy_index = broker.storage_policy_index
+ try:
+ shard_part, shard_broker, node_id = self._get_shard_broker(
+ shard_range, broker.root_path, policy_index)
+ except DeviceUnavailable as duex:
+ self.logger.warning(str(duex))
+ self._increment_stat('cleaved', 'failure', statsd=True)
+ return False
+
+ # only cleave from the retiring db - misplaced objects handler will
+ # deal with any objects in the fresh db
+ source_broker = broker.get_brokers()[0]
+ # if this range has been cleaved before but replication
+ # failed then the shard db may still exist and it may not be
+ # necessary to merge all the rows again
+ source_db_id = source_broker.get_info()['id']
+ source_max_row = source_broker.get_max_row()
+ sync_point = shard_broker.get_sync(source_db_id)
+ if sync_point < source_max_row:
+ sync_from_row = max(cleaving_context.last_cleave_to_row,
+ sync_point)
+ for objects, info in self.yield_objects(
+ source_broker, shard_range,
+ since_row=sync_from_row):
+ shard_broker.merge_items(objects)
+ # Note: the max row stored as a sync point is sampled *before*
+ # objects are yielded to ensure that is less than or equal to
+ # the last yielded row. Other sync points are also copied from the
+ # source broker to the shards; if another replica of the source
+ # happens to subsequently cleave into a primary replica of the
+ # shard then it will only need to cleave rows after its last sync
+ # point with this replica of the source broker.
+ shard_broker.merge_syncs(
+ [{'sync_point': source_max_row, 'remote_id': source_db_id}] +
+ source_broker.get_syncs())
+ else:
+ self.logger.debug("Cleaving '%s': %r - shard db already in sync",
+ broker.path, shard_range)
+
+ own_shard_range = broker.get_own_shard_range()
+
+ replication_quorum = self.existing_shard_replication_quorum
+ if shard_range.includes(own_shard_range):
+ # When shrinking, include deleted own (donor) shard range in
+ # the replicated db so that when acceptor next updates root it
+ # will atomically update its namespace *and* delete the donor.
+ # Don't do this when sharding a shard because the donor
+ # namespace should not be deleted until all shards are cleaved.
+ if own_shard_range.update_state(ShardRange.SHARDED):
+ own_shard_range.set_deleted()
+ broker.merge_shard_ranges(own_shard_range)
+ shard_broker.merge_shard_ranges(own_shard_range)
+ elif shard_range.state == ShardRange.CREATED:
+ # The shard range object stats may have changed since the shard
+ # range was found, so update with stats of objects actually
+ # copied to the shard broker. Only do this the first time each
+ # shard range is cleaved.
+ info = shard_broker.get_info()
+ shard_range.update_meta(
+ info['object_count'], info['bytes_used'])
+ shard_range.update_state(ShardRange.CLEAVED)
+ shard_broker.merge_shard_ranges(shard_range)
+ replication_quorum = self.shard_replication_quorum
+
+ self.logger.info(
+ 'Replicating new shard container %s for %s',
+ shard_broker.path, shard_broker.get_own_shard_range())
+
+ success, responses = self._replicate_object(
+ shard_part, shard_broker.db_file, node_id)
+
+ replication_successes = responses.count(True)
+ if (not success and (not responses or
+ replication_successes < replication_quorum)):
+ # insufficient replication or replication not even attempted;
+ # break because we don't want to progress the cleave cursor
+ # until each shard range has been successfully cleaved
+ self.logger.warning(
+ 'Failed to sufficiently replicate cleaved shard %s for %s: '
+ '%s successes, %s required.', shard_range, broker.path,
+ replication_successes, replication_quorum)
+ self._increment_stat('cleaved', 'failure', statsd=True)
+ return False
+
+ elapsed = round(time.time() - start, 3)
+ self._min_stat('cleaved', 'min_time', elapsed)
+ self._max_stat('cleaved', 'max_time', elapsed)
+ broker.merge_shard_ranges(shard_range)
+ cleaving_context.cursor = shard_range.upper_str
+ cleaving_context.ranges_done += 1
+ cleaving_context.ranges_todo -= 1
+ if shard_range.upper >= own_shard_range.upper:
+ # cleaving complete
+ cleaving_context.cleaving_done = True
+ cleaving_context.store(broker)
+ self.logger.info(
+ 'Cleaved %s for shard range %s in %gs.',
+ broker.path, shard_range, elapsed)
+ self._increment_stat('cleaved', 'success', statsd=True)
+ return True
+
+ def _cleave(self, broker):
+ # Returns True if misplaced objects have been moved and the entire
+ # container namespace has been successfully cleaved, False otherwise
+ if broker.is_sharded():
+ self.logger.debug('Passing over already sharded container %s/%s',
+ broker.account, broker.container)
+ return True
+
+ cleaving_context = CleavingContext.load(broker)
+ if not cleaving_context.misplaced_done:
+ # ensure any misplaced objects in the source broker are moved; note
+ # that this invocation of _move_misplaced_objects is targetted at
+ # the *retiring* db.
+ self.logger.debug(
+ 'Moving any misplaced objects from sharding container: %s',
+ broker.path)
+ bounds = self._make_default_misplaced_object_bounds(broker)
+ cleaving_context.misplaced_done = self._move_misplaced_objects(
+ broker, src_broker=broker.get_brokers()[0],
+ src_bounds=bounds)
+ cleaving_context.store(broker)
+
+ if cleaving_context.cleaving_done:
+ self.logger.debug('Cleaving already complete for container %s',
+ broker.path)
+ return cleaving_context.misplaced_done
+
+ ranges_todo = broker.get_shard_ranges(marker=cleaving_context.marker)
+ if cleaving_context.cursor:
+ # always update ranges_todo in case more ranges have been found
+ # since last visit
+ cleaving_context.ranges_todo = len(ranges_todo)
+ self.logger.debug('Continuing to cleave (%s done, %s todo): %s',
+ cleaving_context.ranges_done,
+ cleaving_context.ranges_todo,
+ broker.path)
+ else:
+ cleaving_context.start()
+ cleaving_context.ranges_todo = len(ranges_todo)
+ self.logger.debug('Starting to cleave (%s todo): %s',
+ cleaving_context.ranges_todo, broker.path)
+
+ ranges_done = []
+ for shard_range in ranges_todo[:self.cleave_batch_size]:
+ if shard_range.state == ShardRange.FOUND:
+ break
+ elif shard_range.state in (ShardRange.CREATED,
+ ShardRange.CLEAVED,
+ ShardRange.ACTIVE):
+ if self._cleave_shard_range(
+ broker, cleaving_context, shard_range):
+ ranges_done.append(shard_range)
+ else:
+ break
+ else:
+ self.logger.warning('Unexpected shard range state for cleave',
+ shard_range.state)
+ break
+
+ if not ranges_done:
+ cleaving_context.store(broker)
+ self.logger.debug(
+ 'Cleaved %s shard ranges for %s', len(ranges_done), broker.path)
+ return (cleaving_context.misplaced_done and
+ cleaving_context.cleaving_done)
+
+ def _complete_sharding(self, broker):
+ cleaving_context = CleavingContext.load(broker)
+ if cleaving_context.done():
+ # Move all CLEAVED shards to ACTIVE state and if a shard then
+ # delete own shard range; these changes will be simultaneously
+ # reported in the next update to the root container.
+ modified_shard_ranges = broker.get_shard_ranges(
+ states=ShardRange.CLEAVED)
+ for sr in modified_shard_ranges:
+ sr.update_state(ShardRange.ACTIVE)
+ own_shard_range = broker.get_own_shard_range()
+ own_shard_range.update_state(ShardRange.SHARDED)
+ own_shard_range.update_meta(0, 0)
+ if (not broker.is_root_container() and not
+ own_shard_range.deleted):
+ own_shard_range = own_shard_range.copy(
+ timestamp=Timestamp.now(), deleted=1)
+ modified_shard_ranges.append(own_shard_range)
+ broker.merge_shard_ranges(modified_shard_ranges)
+ if broker.set_sharded_state():
+ return True
+ else:
+ self.logger.warning(
+ 'Failed to remove retiring db file for %s',
+ broker.path)
+ else:
+ self.logger.warning(
+ 'Repeat cleaving required for %r with context: %s'
+ % (broker.db_files[0], dict(cleaving_context)))
+ cleaving_context.reset()
+ cleaving_context.store(broker)
+
+ return False
+
+ def _find_and_enable_sharding_candidates(self, broker, shard_ranges=None):
+ candidates = find_sharding_candidates(
+ broker, self.shard_container_threshold, shard_ranges)
+ if candidates:
+ self.logger.debug('Identified %s sharding candidates'
+ % len(candidates))
+ broker.merge_shard_ranges(candidates)
+
+ def _find_and_enable_shrinking_candidates(self, broker):
+ if not broker.is_sharded():
+ self.logger.warning('Cannot shrink a not yet sharded container %s',
+ broker.path)
+ return
+
+ merge_pairs = find_shrinking_candidates(
+ broker, self.shrink_size, self.merge_size)
+ self.logger.debug('Found %s shrinking candidates' % len(merge_pairs))
+ own_shard_range = broker.get_own_shard_range()
+ for acceptor, donor in merge_pairs.items():
+ self.logger.debug('shrinking shard range %s into %s in %s' %
+ (donor, acceptor, broker.db_file))
+ broker.merge_shard_ranges([acceptor, donor])
+ if acceptor.name != own_shard_range.name:
+ self._send_shard_ranges(
+ acceptor.account, acceptor.container, [acceptor])
+ acceptor.increment_meta(donor.object_count, donor.bytes_used)
+ else:
+ # no need to change namespace or stats
+ acceptor.update_state(ShardRange.ACTIVE,
+ state_timestamp=Timestamp.now())
+ # Now send a copy of the expanded acceptor, with an updated
+ # timestamp, to the donor container. This forces the donor to
+ # asynchronously cleave its entire contents to the acceptor and
+ # delete itself. The donor will pass its own deleted shard range to
+ # the acceptor when cleaving. Subsequent updates from the donor or
+ # the acceptor will then update the root to have the deleted donor
+ # shard range.
+ self._send_shard_ranges(
+ donor.account, donor.container, [donor, acceptor])
+
+ def _update_root_container(self, broker):
+ own_shard_range = broker.get_own_shard_range(no_default=True)
+ if not own_shard_range:
+ return
+
+ # persist the reported shard metadata
+ broker.merge_shard_ranges(own_shard_range)
+ # now get a consistent list of own and other shard ranges
+ shard_ranges = broker.get_shard_ranges(
+ include_own=True,
+ include_deleted=True)
+ # send everything
+ self._send_shard_ranges(
+ broker.root_account, broker.root_container,
+ shard_ranges)
+
+ def _process_broker(self, broker, node, part):
+ broker.get_info() # make sure account/container are populated
+ state = broker.get_db_state()
+ self.logger.debug('Starting processing %s state %s',
+ broker.path, state)
+
+ if not self._audit_container(broker):
+ return
+
+ # now look and deal with misplaced objects.
+ self._move_misplaced_objects(broker)
+
+ if broker.is_deleted():
+ # This container is deleted so we can skip it. We still want
+ # deleted containers to go via misplaced items because they may
+ # have new objects sitting in them that may need to move.
+ return
+
+ is_leader = node['index'] == 0 and self.auto_shard
+ if state in (UNSHARDED, COLLAPSED):
+ if is_leader and broker.is_root_container():
+ # bootstrap sharding of root container
+ self._find_and_enable_sharding_candidates(
+ broker, shard_ranges=[broker.get_own_shard_range()])
+
+ own_shard_range = broker.get_own_shard_range()
+ if own_shard_range.state in (ShardRange.SHARDING,
+ ShardRange.SHRINKING,
+ ShardRange.SHARDED):
+ if broker.get_shard_ranges():
+ # container has been given shard ranges rather than
+ # found them e.g. via replication or a shrink event
+ if broker.set_sharding_state():
+ state = SHARDING
+ elif is_leader:
+ if broker.set_sharding_state():
+ state = SHARDING
+ else:
+ self.logger.debug(
+ 'Own shard range in state %r but no shard ranges '
+ 'and not leader; remaining unsharded: %s'
+ % (own_shard_range.state_text, broker.path))
+
+ if state == SHARDING:
+ if is_leader:
+ num_found = self._find_shard_ranges(broker)
+ else:
+ num_found = 0
+
+ # create shard containers for newly found ranges
+ num_created = self._create_shard_containers(broker)
+
+ if num_found or num_created:
+ # share updated shard range state with other nodes
+ self._replicate_object(part, broker.db_file, node['id'])
+
+ # always try to cleave any pending shard ranges
+ cleave_complete = self._cleave(broker)
+
+ if cleave_complete:
+ self.logger.info('Completed cleaving of %s', broker.path)
+ if self._complete_sharding(broker):
+ state = SHARDED
+ self._increment_stat('visited', 'completed', statsd=True)
+ else:
+ self.logger.debug('Remaining in sharding state %s',
+ broker.path)
+
+ if state == SHARDED and broker.is_root_container():
+ if is_leader:
+ self._find_and_enable_shrinking_candidates(broker)
+ self._find_and_enable_sharding_candidates(broker)
+ for shard_range in broker.get_shard_ranges(
+ states=[ShardRange.SHARDING]):
+ self._send_shard_ranges(
+ shard_range.account, shard_range.container,
+ [shard_range])
+
+ if not broker.is_root_container():
+ # Update the root container with this container's shard range
+ # info; do this even when sharded in case previous attempts
+ # failed; don't do this if there is no own shard range. When
+ # sharding a shard, this is when the root will see the new
+ # shards move to ACTIVE state and the sharded shard
+ # simultaneously become deleted.
+ self._update_root_container(broker)
+
+ self.logger.debug('Finished processing %s/%s state %s',
+ broker.account, broker.container,
+ broker.get_db_state())
+
+ def _one_shard_cycle(self, devices_to_shard, partitions_to_shard):
+ """
+ The main function, everything the sharder does forks from this method.
+
+ The sharder loops through each container with sharding enabled and each
+ sharded container on the server, on each container it:
+ - audits the container
+ - checks and deals with misplaced items
+ - cleaves any shard ranges as required
+ - if not a root container, reports shard range stats to the root
+ container
+ """
+ self.logger.info('Container sharder cycle starting, auto-sharding %s',
+ self.auto_shard)
+ if isinstance(devices_to_shard, (list, tuple)):
+ self.logger.info('(Override devices: %s)',
+ ', '.join(str(d) for d in devices_to_shard))
+ if isinstance(partitions_to_shard, (list, tuple)):
+ self.logger.info('(Override partitions: %s)',
+ ', '.join(str(p) for p in partitions_to_shard))
+ self._zero_stats()
+ self._local_device_ids = set()
+ dirs = []
+ self.ips = whataremyips(bind_ip=self.bind_ip)
+ for node in self.ring.devs:
+ if not self._check_node(node):
+ continue
+ datadir = os.path.join(self.root, node['device'], self.datadir)
+ if os.path.isdir(datadir):
+ # Populate self._local_device_ids so we can find devices for
+ # shard containers later
+ self._local_device_ids.add(node['id'])
+ if node['device'] not in devices_to_shard:
+ continue
+ part_filt = self._partition_dir_filter(
+ node['id'],
+ partitions_to_shard)
+ dirs.append((datadir, node, part_filt))
+ if not dirs:
+ self.logger.warning('Found no data dirs!')
+ for part, path, node in db_replicator.roundrobin_datadirs(dirs):
+ # NB: get_part_nodes always provides an 'index' key;
+ # this will be used in leader selection
+ for primary in self.ring.get_part_nodes(int(part)):
+ if node['id'] == primary['id']:
+ node = primary
+ break
+ else:
+ # Set index such that we'll *never* be selected as a leader
+ node['index'] = 'handoff'
+
+ broker = ContainerBroker(path, logger=self.logger,
+ timeout=self.broker_timeout)
+ error = None
+ try:
+ self._identify_sharding_candidate(broker, node)
+ if sharding_enabled(broker):
+ self._increment_stat('visited', 'attempted')
+ self._process_broker(broker, node, part)
+ self._increment_stat('visited', 'success', statsd=True)
+ else:
+ self._increment_stat('visited', 'skipped')
+ except (Exception, Timeout) as error:
+ self._increment_stat('visited', 'failure', statsd=True)
+ self.logger.exception(
+ 'Unhandled exception while processing %s: %s', path, error)
+ try:
+ self._record_sharding_progress(broker, node, error)
+ except (Exception, Timeout) as error:
+ self.logger.exception(
+ 'Unhandled exception while dumping progress for %s: %s',
+ path, error)
+ self._periodic_report_stats()
+
+ self._report_stats()
+
+ def run_forever(self, *args, **kwargs):
+ """Run the container sharder until stopped."""
+ self.reported = time.time()
+ time.sleep(random() * self.interval)
+ while True:
+ begin = time.time()
+ try:
+ self._one_shard_cycle(devices_to_shard=Everything(),
+ partitions_to_shard=Everything())
+ except (Exception, Timeout):
+ self.logger.increment('errors')
+ self.logger.exception('Exception in sharder')
+ elapsed = time.time() - begin
+ self.logger.info(
+ 'Container sharder cycle completed: %.02fs', elapsed)
+ if elapsed < self.interval:
+ time.sleep(self.interval - elapsed)
+
+ def run_once(self, *args, **kwargs):
+ """Run the container sharder once."""
+ self.logger.info('Begin container sharder "once" mode')
+ override_options = parse_override_options(once=True, **kwargs)
+ devices_to_shard = override_options.devices or Everything()
+ partitions_to_shard = override_options.partitions or Everything()
+ begin = self.reported = time.time()
+ self._one_shard_cycle(devices_to_shard=devices_to_shard,
+ partitions_to_shard=partitions_to_shard)
+ elapsed = time.time() - begin
+ self.logger.info(
+ 'Container sharder "once" mode completed: %.02fs', elapsed)
diff --git a/swift/obj/server.py b/swift/obj/server.py
index 36bd758d3f..2f584bb319 100644
--- a/swift/obj/server.py
+++ b/swift/obj/server.py
@@ -35,7 +35,7 @@ from swift.common.utils import public, get_logger, \
normalize_delete_at_timestamp, get_log_line, Timestamp, \
get_expirer_container, parse_mime_headers, \
iter_multipart_mime_documents, extract_swift_bytes, safe_json_loads, \
- config_auto_int_value
+ config_auto_int_value, split_path, get_redirect_data
from swift.common.bufferedhttp import http_connect
from swift.common.constraints import check_object_creation, \
valid_timestamp, check_utf8
@@ -44,7 +44,7 @@ from swift.common.exceptions import ConnectionTimeout, DiskFileQuarantined, \
DiskFileDeviceUnavailable, DiskFileExpired, ChunkReadTimeout, \
ChunkReadError, DiskFileXattrNotSupported
from swift.obj import ssync_receiver
-from swift.common.http import is_success
+from swift.common.http import is_success, HTTP_MOVED_PERMANENTLY
from swift.common.base_storage_server import BaseStorageServer
from swift.common.header_key_dict import HeaderKeyDict
from swift.common.request_helpers import get_name_and_placement, \
@@ -245,7 +245,7 @@ class ObjectController(BaseStorageServer):
def async_update(self, op, account, container, obj, host, partition,
contdevice, headers_out, objdevice, policy,
- logger_thread_locals=None):
+ logger_thread_locals=None, container_path=None):
"""
Sends or saves an async update.
@@ -263,11 +263,21 @@ class ObjectController(BaseStorageServer):
:param logger_thread_locals: The thread local values to be set on the
self.logger to retain transaction
logging information.
+ :param container_path: optional path in the form ``
+ to which the update should be sent. If given this path will be used
+ instead of constructing a path from the ``account`` and
+ ``container`` params.
"""
if logger_thread_locals:
self.logger.thread_locals = logger_thread_locals
headers_out['user-agent'] = 'object-server %s' % os.getpid()
- full_path = '/%s/%s/%s' % (account, container, obj)
+ if container_path:
+ # use explicitly specified container path
+ full_path = '/%s/%s' % (container_path, obj)
+ else:
+ full_path = '/%s/%s/%s' % (account, container, obj)
+
+ redirect_data = None
if all([host, partition, contdevice]):
try:
with ConnectionTimeout(self.conn_timeout):
@@ -277,15 +287,23 @@ class ObjectController(BaseStorageServer):
with Timeout(self.node_timeout):
response = conn.getresponse()
response.read()
- if is_success(response.status):
- return
- else:
- self.logger.error(_(
- 'ERROR Container update failed '
- '(saving for async update later): %(status)d '
- 'response from %(ip)s:%(port)s/%(dev)s'),
- {'status': response.status, 'ip': ip, 'port': port,
- 'dev': contdevice})
+ if is_success(response.status):
+ return
+
+ if response.status == HTTP_MOVED_PERMANENTLY:
+ try:
+ redirect_data = get_redirect_data(response)
+ except ValueError as err:
+ self.logger.error(
+ 'Container update failed for %r; problem with '
+ 'redirect location: %s' % (obj, err))
+ else:
+ self.logger.error(_(
+ 'ERROR Container update failed '
+ '(saving for async update later): %(status)d '
+ 'response from %(ip)s:%(port)s/%(dev)s'),
+ {'status': response.status, 'ip': ip, 'port': port,
+ 'dev': contdevice})
except (Exception, Timeout):
self.logger.exception(_(
'ERROR container update failed with '
@@ -293,6 +311,13 @@ class ObjectController(BaseStorageServer):
{'ip': ip, 'port': port, 'dev': contdevice})
data = {'op': op, 'account': account, 'container': container,
'obj': obj, 'headers': headers_out}
+ if redirect_data:
+ self.logger.debug(
+ 'Update to %(path)s redirected to %(redirect)s',
+ {'path': full_path, 'redirect': redirect_data[0]})
+ container_path = redirect_data[0]
+ if container_path:
+ data['container_path'] = container_path
timestamp = headers_out.get('x-meta-timestamp',
headers_out.get('x-timestamp'))
self._diskfile_router[policy].pickle_async_update(
@@ -319,6 +344,7 @@ class ObjectController(BaseStorageServer):
contdevices = [d.strip() for d in
headers_in.get('X-Container-Device', '').split(',')]
contpartition = headers_in.get('X-Container-Partition', '')
+ contpath = headers_in.get('X-Backend-Container-Path')
if len(conthosts) != len(contdevices):
# This shouldn't happen unless there's a bug in the proxy,
@@ -331,6 +357,21 @@ class ObjectController(BaseStorageServer):
'devices': headers_in.get('X-Container-Device', '')})
return
+ if contpath:
+ try:
+ # TODO: this is very late in request handling to be validating
+ # a header - if we did *not* check and the header was bad
+ # presumably the update would fail and we would fall back to an
+ # async update to the root container, which might be best
+ # course of action rather than aborting update altogether?
+ split_path('/' + contpath, minsegs=2, maxsegs=2)
+ except ValueError:
+ self.logger.error(
+ "Invalid X-Backend-Container-Path, should be of the form "
+ "'account/container' but got %r." % contpath)
+ # fall back to updating root container
+ contpath = None
+
if contpartition:
updates = zip(conthosts, contdevices)
else:
@@ -344,7 +385,8 @@ class ObjectController(BaseStorageServer):
gt = spawn(self.async_update, op, account, container, obj,
conthost, contpartition, contdevice, headers_out,
objdevice, policy,
- logger_thread_locals=self.logger.thread_locals)
+ logger_thread_locals=self.logger.thread_locals,
+ container_path=contpath)
update_greenthreads.append(gt)
# Wait a little bit to see if the container updates are successful.
# If we immediately return after firing off the greenthread above, then
diff --git a/swift/obj/updater.py b/swift/obj/updater.py
index df21c01d7b..febb754ce9 100644
--- a/swift/obj/updater.py
+++ b/swift/obj/updater.py
@@ -28,12 +28,14 @@ from swift.common.constraints import check_drive
from swift.common.exceptions import ConnectionTimeout
from swift.common.ring import Ring
from swift.common.utils import get_logger, renamer, write_pickle, \
- dump_recon_cache, config_true_value, ratelimit_sleep, eventlet_monkey_patch
+ dump_recon_cache, config_true_value, ratelimit_sleep, split_path, \
+ eventlet_monkey_patch, get_redirect_data
from swift.common.daemon import Daemon
from swift.common.header_key_dict import HeaderKeyDict
from swift.common.storage_policy import split_policy_string, PolicyError
from swift.obj.diskfile import get_tmp_dir, ASYNCDIR_BASE
-from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR
+from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR, \
+ HTTP_MOVED_PERMANENTLY
class SweepStats(object):
@@ -41,12 +43,13 @@ class SweepStats(object):
Stats bucket for an update sweep
"""
def __init__(self, errors=0, failures=0, quarantines=0, successes=0,
- unlinks=0):
+ unlinks=0, redirects=0):
self.errors = errors
self.failures = failures
self.quarantines = quarantines
self.successes = successes
self.unlinks = unlinks
+ self.redirects = redirects
def copy(self):
return type(self)(self.errors, self.failures, self.quarantines,
@@ -57,7 +60,8 @@ class SweepStats(object):
self.failures - other.failures,
self.quarantines - other.quarantines,
self.successes - other.successes,
- self.unlinks - other.unlinks)
+ self.unlinks - other.unlinks,
+ self.redirects - other.redirects)
def reset(self):
self.errors = 0
@@ -65,6 +69,7 @@ class SweepStats(object):
self.quarantines = 0
self.successes = 0
self.unlinks = 0
+ self.redirects = 0
def __str__(self):
keys = (
@@ -73,6 +78,7 @@ class SweepStats(object):
(self.quarantines, 'quarantines'),
(self.unlinks, 'unlinks'),
(self.errors, 'errors'),
+ (self.redirects, 'redirects'),
)
return ', '.join('%d %s' % pair for pair in keys)
@@ -279,7 +285,8 @@ class ObjectUpdater(Daemon):
'in %(elapsed).02fs seconds:, '
'%(successes)d successes, %(failures)d failures, '
'%(quarantines)d quarantines, '
- '%(unlinks)d unlinks, %(errors)d errors '
+ '%(unlinks)d unlinks, %(errors)d errors, '
+ '%(redirects)d redirects '
'(pid: %(pid)d)'),
{'device': device,
'elapsed': time.time() - start_time,
@@ -288,7 +295,8 @@ class ObjectUpdater(Daemon):
'failures': sweep_totals.failures,
'quarantines': sweep_totals.quarantines,
'unlinks': sweep_totals.unlinks,
- 'errors': sweep_totals.errors})
+ 'errors': sweep_totals.errors,
+ 'redirects': sweep_totals.redirects})
def process_object_update(self, update_path, device, policy):
"""
@@ -309,44 +317,83 @@ class ObjectUpdater(Daemon):
os.path.basename(update_path))
renamer(update_path, target_path, fsync=False)
return
- successes = update.get('successes', [])
- part, nodes = self.get_container_ring().get_nodes(
- update['account'], update['container'])
- obj = '/%s/%s/%s' % \
- (update['account'], update['container'], update['obj'])
- headers_out = HeaderKeyDict(update['headers'])
- headers_out['user-agent'] = 'object-updater %s' % os.getpid()
- headers_out.setdefault('X-Backend-Storage-Policy-Index',
- str(int(policy)))
- events = [spawn(self.object_update,
- node, part, update['op'], obj, headers_out)
- for node in nodes if node['id'] not in successes]
- success = True
- new_successes = False
- for event in events:
- event_success, node_id = event.wait()
- if event_success is True:
- successes.append(node_id)
- new_successes = True
+
+ def do_update():
+ successes = update.get('successes', [])
+ headers_out = HeaderKeyDict(update['headers'].copy())
+ headers_out['user-agent'] = 'object-updater %s' % os.getpid()
+ headers_out.setdefault('X-Backend-Storage-Policy-Index',
+ str(int(policy)))
+ headers_out.setdefault('X-Backend-Accept-Redirect', 'true')
+ container_path = update.get('container_path')
+ if container_path:
+ acct, cont = split_path('/' + container_path, minsegs=2)
else:
- success = False
- if success:
- self.stats.successes += 1
- self.logger.increment('successes')
- self.logger.debug('Update sent for %(obj)s %(path)s',
- {'obj': obj, 'path': update_path})
- self.stats.unlinks += 1
- self.logger.increment('unlinks')
- os.unlink(update_path)
- else:
- self.stats.failures += 1
- self.logger.increment('failures')
- self.logger.debug('Update failed for %(obj)s %(path)s',
- {'obj': obj, 'path': update_path})
- if new_successes:
- update['successes'] = successes
- write_pickle(update, update_path, os.path.join(
- device, get_tmp_dir(policy)))
+ acct, cont = update['account'], update['container']
+ part, nodes = self.get_container_ring().get_nodes(acct, cont)
+ obj = '/%s/%s/%s' % (acct, cont, update['obj'])
+ events = [spawn(self.object_update,
+ node, part, update['op'], obj, headers_out)
+ for node in nodes if node['id'] not in successes]
+ success = True
+ new_successes = rewrite_pickle = False
+ redirect = None
+ redirects = set()
+ for event in events:
+ event_success, node_id, redirect = event.wait()
+ if event_success is True:
+ successes.append(node_id)
+ new_successes = True
+ else:
+ success = False
+ if redirect:
+ redirects.add(redirect)
+
+ if success:
+ self.stats.successes += 1
+ self.logger.increment('successes')
+ self.logger.debug('Update sent for %(obj)s %(path)s',
+ {'obj': obj, 'path': update_path})
+ self.stats.unlinks += 1
+ self.logger.increment('unlinks')
+ os.unlink(update_path)
+ elif redirects:
+ # erase any previous successes
+ update.pop('successes', None)
+ redirect = max(redirects, key=lambda x: x[-1])[0]
+ redirect_history = update.setdefault('redirect_history', [])
+ if redirect in redirect_history:
+ # force next update to be sent to root, reset history
+ update['container_path'] = None
+ update['redirect_history'] = []
+ else:
+ update['container_path'] = redirect
+ redirect_history.append(redirect)
+ self.stats.redirects += 1
+ self.logger.increment("redirects")
+ self.logger.debug(
+ 'Update redirected for %(obj)s %(path)s to %(shard)s',
+ {'obj': obj, 'path': update_path,
+ 'shard': update['container_path']})
+ rewrite_pickle = True
+ else:
+ self.stats.failures += 1
+ self.logger.increment('failures')
+ self.logger.debug('Update failed for %(obj)s %(path)s',
+ {'obj': obj, 'path': update_path})
+ if new_successes:
+ update['successes'] = successes
+ rewrite_pickle = True
+
+ return rewrite_pickle, redirect
+
+ rewrite_pickle, redirect = do_update()
+ if redirect:
+ # make one immediate retry to the redirect location
+ rewrite_pickle, redirect = do_update()
+ if rewrite_pickle:
+ write_pickle(update, update_path, os.path.join(
+ device, get_tmp_dir(policy)))
def object_update(self, node, part, op, obj, headers_out):
"""
@@ -357,7 +404,12 @@ class ObjectUpdater(Daemon):
:param op: operation performed (ex: 'PUT' or 'DELETE')
:param obj: object name being updated
:param headers_out: headers to send with the update
+ :return: a tuple of (``success``, ``node_id``, ``redirect``)
+ where ``success`` is True if the update succeeded, ``node_id`` is
+ the_id of the node updated and ``redirect`` is either None or a
+ tuple of (a path, a timestamp string).
"""
+ redirect = None
try:
with ConnectionTimeout(self.conn_timeout):
conn = http_connect(node['ip'], node['port'], node['device'],
@@ -365,15 +417,24 @@ class ObjectUpdater(Daemon):
with Timeout(self.node_timeout):
resp = conn.getresponse()
resp.read()
- success = is_success(resp.status)
- if not success:
- self.logger.debug(
- _('Error code %(status)d is returned from remote '
- 'server %(ip)s: %(port)s / %(device)s'),
- {'status': resp.status, 'ip': node['ip'],
- 'port': node['port'], 'device': node['device']})
- return (success, node['id'])
+
+ if resp.status == HTTP_MOVED_PERMANENTLY:
+ try:
+ redirect = get_redirect_data(resp)
+ except ValueError as err:
+ self.logger.error(
+ 'Container update failed for %r; problem with '
+ 'redirect location: %s' % (obj, err))
+
+ success = is_success(resp.status)
+ if not success:
+ self.logger.debug(
+ _('Error code %(status)d is returned from remote '
+ 'server %(ip)s: %(port)s / %(device)s'),
+ {'status': resp.status, 'ip': node['ip'],
+ 'port': node['port'], 'device': node['device']})
+ return success, node['id'], redirect
except (Exception, Timeout):
self.logger.exception(_('ERROR with remote server '
'%(ip)s:%(port)s/%(device)s'), node)
- return HTTP_INTERNAL_SERVER_ERROR, node['id']
+ return HTTP_INTERNAL_SERVER_ERROR, node['id'], redirect
diff --git a/swift/proxy/controllers/base.py b/swift/proxy/controllers/base.py
index df0ea71b89..4822b01729 100644
--- a/swift/proxy/controllers/base.py
+++ b/swift/proxy/controllers/base.py
@@ -28,6 +28,7 @@ from six.moves.urllib.parse import quote
import os
import time
+import json
import functools
import inspect
import itertools
@@ -40,11 +41,11 @@ from eventlet import sleep
from eventlet.timeout import Timeout
import six
-from swift.common.wsgi import make_pre_authed_env
+from swift.common.wsgi import make_pre_authed_env, make_pre_authed_request
from swift.common.utils import Timestamp, config_true_value, \
public, split_path, list_from_csv, GreenthreadSafeIterator, \
GreenAsyncPile, quorum_size, parse_content_type, \
- document_iters_to_http_response_body
+ document_iters_to_http_response_body, ShardRange
from swift.common.bufferedhttp import http_connect
from swift.common import constraints
from swift.common.exceptions import ChunkReadTimeout, ChunkWriteTimeout, \
@@ -188,6 +189,7 @@ def headers_to_container_info(headers, status_int=HTTP_OK):
},
'meta': meta,
'sysmeta': sysmeta,
+ 'sharding_state': headers.get('x-backend-sharding-state', 'unsharded'),
}
@@ -375,6 +377,9 @@ def get_container_info(env, app, swift_source=None):
else:
info[field] = int(info[field])
+ if info.get('sharding_state') is None:
+ info['sharding_state'] = 'unsharded'
+
return info
@@ -1994,3 +1999,91 @@ class Controller(object):
else:
raise ValueError(
"server_type can only be 'account' or 'container'")
+
+ def _get_container_listing(self, req, account, container, headers=None,
+ params=None):
+ """
+ Fetch container listing from given `account/container`.
+
+ :param req: original Request instance.
+ :param account: account in which `container` is stored.
+ :param container: container from listing should be fetched.
+ :param headers: headers to be included with the request
+ :param params: query string parameters to be used.
+ :return: a tuple of (deserialized json data structure, swob Response)
+ """
+ params = params or {}
+ version, _a, _c, _other = req.split_path(3, 4, True)
+ path = '/'.join(['', version, account, container])
+
+ subreq = make_pre_authed_request(
+ req.environ, method='GET', path=quote(path), headers=req.headers,
+ swift_source='SH')
+ if headers:
+ subreq.headers.update(headers)
+ subreq.params = params
+ self.app.logger.debug(
+ 'Get listing from %s %s' % (subreq.path_qs, headers))
+ response = self.app.handle_request(subreq)
+
+ if not is_success(response.status_int):
+ self.app.logger.warning(
+ 'Failed to get container listing from %s: %s',
+ subreq.path_qs, response.status_int)
+ return None, response
+
+ try:
+ data = json.loads(response.body)
+ if not isinstance(data, list):
+ raise ValueError('not a list')
+ return data, response
+ except ValueError as err:
+ self.app.logger.error(
+ 'Problem with listing response from %s: %r',
+ subreq.path_qs, err)
+ return None, response
+
+ def _get_shard_ranges(self, req, account, container, includes=None,
+ states=None):
+ """
+ Fetch shard ranges from given `account/container`. If `includes` is
+ given then the shard range for that object name is requested, otherwise
+ all shard ranges are requested.
+
+ :param req: original Request instance.
+ :param account: account from which shard ranges should be fetched.
+ :param container: container from which shard ranges should be fetched.
+ :param includes: (optional) restricts the list of fetched shard ranges
+ to those which include the given name.
+ :param states: (optional) the states of shard ranges to be fetched.
+ :return: a list of instances of :class:`swift.common.utils.ShardRange`,
+ or None if there was a problem fetching the shard ranges
+ """
+ params = req.params.copy()
+ params.pop('limit', None)
+ params['format'] = 'json'
+ if includes:
+ params['includes'] = includes
+ if states:
+ params['states'] = states
+ headers = {'X-Backend-Record-Type': 'shard'}
+ listing, response = self._get_container_listing(
+ req, account, container, headers=headers, params=params)
+ if listing is None:
+ return None
+
+ record_type = response.headers.get('x-backend-record-type')
+ if record_type != 'shard':
+ err = 'unexpected record type %r' % record_type
+ self.app.logger.error("Failed to get shard ranges from %s: %s",
+ req.path_qs, err)
+ return None
+
+ try:
+ return [ShardRange.from_dict(shard_range)
+ for shard_range in listing]
+ except (ValueError, TypeError, KeyError) as err:
+ self.app.logger.error(
+ "Failed to get shard ranges from %s: invalid data: %r",
+ req.path_qs, err)
+ return None
diff --git a/swift/proxy/controllers/container.py b/swift/proxy/controllers/container.py
index 15c67858ea..e90632a294 100644
--- a/swift/proxy/controllers/container.py
+++ b/swift/proxy/controllers/container.py
@@ -14,11 +14,14 @@
# limitations under the License.
from swift import gettext_ as _
+import json
from six.moves.urllib.parse import unquote
-from swift.common.utils import public, csv_append, Timestamp
-from swift.common.constraints import check_metadata
+from swift.common.utils import public, csv_append, Timestamp, \
+ config_true_value, ShardRange
+from swift.common.constraints import check_metadata, CONTAINER_LISTING_LIMIT
from swift.common.http import HTTP_ACCEPTED, is_success
+from swift.common.request_helpers import get_sys_meta_prefix
from swift.proxy.controllers.base import Controller, delay_denial, \
cors_validation, set_info_cache, clear_info_cache
from swift.common.storage_policy import POLICIES
@@ -84,7 +87,9 @@ class ContainerController(Controller):
def GETorHEAD(self, req):
"""Handler for HTTP GET/HEAD requests."""
ai = self.account_info(self.account_name, req)
- if not ai[1]:
+ auto_account = self.account_name.startswith(
+ self.app.auto_create_account_prefix)
+ if not (auto_account or ai[1]):
if 'swift.authorize' in req.environ:
aresp = req.environ['swift.authorize'](req)
if aresp:
@@ -101,10 +106,20 @@ class ContainerController(Controller):
node_iter = self.app.iter_nodes(self.app.container_ring, part)
params = req.params
params['format'] = 'json'
+ record_type = req.headers.get('X-Backend-Record-Type', '').lower()
+ if not record_type:
+ record_type = 'auto'
+ req.headers['X-Backend-Record-Type'] = 'auto'
+ params['states'] = 'listing'
req.params = params
resp = self.GETorHEAD_base(
req, _('Container'), node_iter, part,
req.swift_entity_path, concurrency)
+ resp_record_type = resp.headers.get('X-Backend-Record-Type', '')
+ if all((req.method == "GET", record_type == 'auto',
+ resp_record_type.lower() == 'shard')):
+ resp = self._get_from_shards(req, resp)
+
# Cache this. We just made a request to a storage node and got
# up-to-date information for the container.
resp.headers['X-Backend-Recheck-Container-Existence'] = str(
@@ -122,6 +137,104 @@ class ContainerController(Controller):
for key in self.app.swift_owner_headers:
if key in resp.headers:
del resp.headers[key]
+ # Expose sharding state in reseller requests
+ if req.environ.get('reseller_request', False):
+ resp.headers['X-Container-Sharding'] = config_true_value(
+ resp.headers.get(get_sys_meta_prefix('container') + 'Sharding',
+ 'False'))
+ return resp
+
+ def _get_from_shards(self, req, resp):
+ # construct listing using shards described by the response body
+ shard_ranges = [ShardRange.from_dict(data)
+ for data in json.loads(resp.body)]
+ self.app.logger.debug('GET listing from %s shards for: %s',
+ len(shard_ranges), req.path_qs)
+ if not shard_ranges:
+ # can't find ranges or there was a problem getting the ranges. So
+ # return what we have.
+ return resp
+
+ objects = []
+ req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT))
+ params = req.params.copy()
+ params.pop('states', None)
+ req.headers.pop('X-Backend-Record-Type', None)
+ reverse = config_true_value(params.get('reverse'))
+ marker = params.get('marker')
+ end_marker = params.get('end_marker')
+
+ limit = req_limit
+ for shard_range in shard_ranges:
+ params['limit'] = limit
+ # Always set marker to ensure that object names less than or equal
+ # to those already in the listing are not fetched
+ if objects:
+ last_name = objects[-1].get('name',
+ objects[-1].get('subdir', u''))
+ params['marker'] = last_name.encode('utf-8')
+ elif reverse and marker and marker > shard_range.lower:
+ params['marker'] = marker
+ elif marker and marker <= shard_range.upper:
+ params['marker'] = marker
+ else:
+ params['marker'] = shard_range.upper_str if reverse \
+ else shard_range.lower_str
+ if params['marker'] and reverse:
+ params['marker'] += '\x00'
+
+ # Always set end_marker to ensure that misplaced objects beyond
+ # the expected shard range are not fetched
+ if end_marker and end_marker in shard_range:
+ params['end_marker'] = end_marker
+ else:
+ params['end_marker'] = shard_range.lower_str if reverse \
+ else shard_range.upper_str
+ if params['end_marker'] and not reverse:
+ params['end_marker'] += '\x00'
+
+ if (shard_range.account == self.account_name and
+ shard_range.container == self.container_name):
+ # directed back to same container - force GET of objects
+ headers = {'X-Backend-Record-Type': 'object'}
+ else:
+ headers = None
+ self.app.logger.debug('Getting from %s %s with %s',
+ shard_range, shard_range.name, headers)
+ objs, shard_resp = self._get_container_listing(
+ req, shard_range.account, shard_range.container,
+ headers=headers, params=params)
+
+ if not objs:
+ # tolerate errors or empty shard containers
+ continue
+
+ objects.extend(objs)
+ limit -= len(objs)
+
+ if limit <= 0:
+ break
+ elif (end_marker and reverse and
+ end_marker >= objects[-1]['name'].encode('utf-8')):
+ break
+ elif (end_marker and not reverse and
+ end_marker <= objects[-1]['name'].encode('utf-8')):
+ break
+
+ resp.body = json.dumps(objects)
+ constrained = any(req.params.get(constraint) for constraint in (
+ 'marker', 'end_marker', 'path', 'prefix', 'delimiter'))
+ if not constrained and len(objects) < req_limit:
+ self.app.logger.debug('Setting object count to %s' % len(objects))
+ # prefer the actual listing stats over the potentially outdated
+ # root stats. This condition is only likely when a sharded
+ # container is shrinking or in tests; typically a sharded container
+ # will have more than CONTAINER_LISTING_LIMIT objects so any
+ # unconstrained listing will be capped by the limit and total
+ # object stats cannot therefore be inferred from the listing.
+ resp.headers['X-Container-Object-Count'] = len(objects)
+ resp.headers['X-Container-Bytes-Used'] = sum(
+ [o['bytes'] for o in objects])
return resp
@public
@@ -150,6 +263,10 @@ class ContainerController(Controller):
if not req.environ.get('swift_owner'):
for key in self.app.swift_owner_headers:
req.headers.pop(key, None)
+ if req.environ.get('reseller_request', False) and \
+ 'X-Container-Sharding' in req.headers:
+ req.headers[get_sys_meta_prefix('container') + 'Sharding'] = \
+ str(config_true_value(req.headers['X-Container-Sharding']))
length_limit = self.get_name_length_limit()
if len(self.container_name) > length_limit:
resp = HTTPBadRequest(request=req)
@@ -198,6 +315,10 @@ class ContainerController(Controller):
if not req.environ.get('swift_owner'):
for key in self.app.swift_owner_headers:
req.headers.pop(key, None)
+ if req.environ.get('reseller_request', False) and \
+ 'X-Container-Sharding' in req.headers:
+ req.headers[get_sys_meta_prefix('container') + 'Sharding'] = \
+ str(config_true_value(req.headers['X-Container-Sharding']))
account_partition, accounts, container_count = \
self.account_info(self.account_name, req)
if not accounts:
diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py
index d8aadf7935..7a41ef3c53 100644
--- a/swift/proxy/controllers/obj.py
+++ b/swift/proxy/controllers/obj.py
@@ -266,6 +266,20 @@ class BaseObjectController(Controller):
"""Handler for HTTP HEAD requests."""
return self.GETorHEAD(req)
+ def _get_update_target(self, req, container_info):
+ # find the sharded container to which we'll send the update
+ db_state = container_info.get('sharding_state', 'unsharded')
+ if db_state in ('sharded', 'sharding'):
+ shard_ranges = self._get_shard_ranges(
+ req, self.account_name, self.container_name,
+ includes=self.object_name, states='updating')
+ if shard_ranges:
+ partition, nodes = self.app.container_ring.get_nodes(
+ shard_ranges[0].account, shard_ranges[0].container)
+ return partition, nodes, shard_ranges[0].name
+
+ return container_info['partition'], container_info['nodes'], None
+
@public
@cors_validation
@delay_denial
@@ -273,8 +287,8 @@ class BaseObjectController(Controller):
"""HTTP POST request handler."""
container_info = self.container_info(
self.account_name, self.container_name, req)
- container_partition = container_info['partition']
- container_nodes = container_info['nodes']
+ container_partition, container_nodes, container_path = \
+ self._get_update_target(req, container_info)
req.acl = container_info['write_acl']
if 'swift.authorize' in req.environ:
aresp = req.environ['swift.authorize'](req)
@@ -304,13 +318,14 @@ class BaseObjectController(Controller):
headers = self._backend_requests(
req, len(nodes), container_partition, container_nodes,
- delete_at_container, delete_at_part, delete_at_nodes)
+ delete_at_container, delete_at_part, delete_at_nodes,
+ container_path=container_path)
return self._post_object(req, obj_ring, partition, headers)
def _backend_requests(self, req, n_outgoing,
container_partition, containers,
delete_at_container=None, delete_at_partition=None,
- delete_at_nodes=None):
+ delete_at_nodes=None, container_path=None):
policy_index = req.headers['X-Backend-Storage-Policy-Index']
policy = POLICIES.get_by_index(policy_index)
headers = [self.generate_request_headers(req, additional=req.headers)
@@ -324,6 +339,8 @@ class BaseObjectController(Controller):
headers[index]['X-Container-Device'] = csv_append(
headers[index].get('X-Container-Device'),
container['device'])
+ if container_path:
+ headers[index]['X-Backend-Container-Path'] = container_path
def set_delete_at_headers(index, delete_at_node):
headers[index]['X-Delete-At-Container'] = delete_at_container
@@ -752,8 +769,8 @@ class BaseObjectController(Controller):
policy_index = req.headers.get('X-Backend-Storage-Policy-Index',
container_info['storage_policy'])
obj_ring = self.app.get_object_ring(policy_index)
- container_nodes = container_info['nodes']
- container_partition = container_info['partition']
+ container_partition, container_nodes, container_path = \
+ self._get_update_target(req, container_info)
partition, nodes = obj_ring.get_nodes(
self.account_name, self.container_name, self.object_name)
@@ -800,7 +817,8 @@ class BaseObjectController(Controller):
# add special headers to be handled by storage nodes
outgoing_headers = self._backend_requests(
req, len(nodes), container_partition, container_nodes,
- delete_at_container, delete_at_part, delete_at_nodes)
+ delete_at_container, delete_at_part, delete_at_nodes,
+ container_path=container_path)
# send object to storage nodes
resp = self._store_object(
@@ -823,8 +841,8 @@ class BaseObjectController(Controller):
next_part_power = getattr(obj_ring, 'next_part_power', None)
if next_part_power:
req.headers['X-Backend-Next-Part-Power'] = next_part_power
- container_partition = container_info['partition']
- container_nodes = container_info['nodes']
+ container_partition, container_nodes, container_path = \
+ self._get_update_target(req, container_info)
req.acl = container_info['write_acl']
req.environ['swift_sync_key'] = container_info['sync_key']
if 'swift.authorize' in req.environ:
@@ -851,7 +869,8 @@ class BaseObjectController(Controller):
node_count += local_handoffs
headers = self._backend_requests(
- req, node_count, container_partition, container_nodes)
+ req, node_count, container_partition, container_nodes,
+ container_path=container_path)
return self._delete_object(req, obj_ring, partition, headers)
diff --git a/test/__init__.py b/test/__init__.py
index 1a56597158..51e3aa9d82 100644
--- a/test/__init__.py
+++ b/test/__init__.py
@@ -17,7 +17,11 @@
# The code below enables nosetests to work with i18n _() blocks
from __future__ import print_function
import sys
+from contextlib import contextmanager
+
import os
+from six import reraise
+
try:
from unittest.util import safe_repr
except ImportError:
@@ -86,3 +90,26 @@ def listen_zero():
sock.bind(("127.0.0.1", 0))
sock.listen(50)
return sock
+
+
+@contextmanager
+def annotate_failure(msg):
+ """
+ Catch AssertionError and annotate it with a message. Useful when making
+ assertions in a loop where the message can indicate the loop index or
+ richer context about the failure.
+
+ :param msg: A message to be prefixed to the AssertionError message.
+ """
+ try:
+ yield
+ except AssertionError as err:
+ err_typ, err_val, err_tb = sys.exc_info()
+ if err_val.args:
+ msg = '%s Failed with %s' % (msg, err_val.args[0])
+ err_val.args = (msg, ) + err_val.args[1:]
+ else:
+ # workaround for some IDE's raising custom AssertionErrors
+ err_val = '%s Failed with %s' % (msg, err)
+ err_typ = AssertionError
+ reraise(err_typ, err_val, err_tb)
diff --git a/test/probe/brain.py b/test/probe/brain.py
index 843754210e..fd597cf6b3 100644
--- a/test/probe/brain.py
+++ b/test/probe/brain.py
@@ -99,9 +99,11 @@ class BrainSplitter(object):
raise ValueError('Unknown server_type: %r' % server_type)
self.server_type = server_type
- part, nodes = self.ring.get_nodes(self.account, c, o)
+ self.part, self.nodes = self.ring.get_nodes(self.account, c, o)
+
+ node_ids = [n['id'] for n in self.nodes]
+ self.node_numbers = [n + 1 for n in node_ids]
- node_ids = [n['id'] for n in nodes]
if all(n_id in node_ids for n_id in (0, 1)):
self.primary_numbers = (1, 2)
self.handoff_numbers = (3, 4)
diff --git a/test/probe/common.py b/test/probe/common.py
index ccb5751f26..5622d71b64 100644
--- a/test/probe/common.py
+++ b/test/probe/common.py
@@ -14,6 +14,8 @@
# limitations under the License.
from __future__ import print_function
+
+import errno
import os
from subprocess import Popen, PIPE
import sys
@@ -125,13 +127,17 @@ def kill_server(ipport, ipport2server):
if err:
raise Exception('unable to kill %s' % (server if not number else
'%s%s' % (server, number)))
+ return wait_for_server_to_hangup(ipport)
+
+
+def wait_for_server_to_hangup(ipport):
try_until = time() + 30
while True:
try:
conn = HTTPConnection(*ipport)
conn.request('GET', '/')
conn.getresponse()
- except Exception as err:
+ except Exception:
break
if time() > try_until:
raise Exception(
@@ -334,33 +340,35 @@ class ProbeTest(unittest.TestCase):
Don't instantiate this directly, use a child class instead.
"""
+ def _load_rings_and_configs(self):
+ self.ipport2server = {}
+ self.configs = defaultdict(dict)
+ self.account_ring = get_ring(
+ 'account',
+ self.acct_cont_required_replicas,
+ self.acct_cont_required_devices,
+ ipport2server=self.ipport2server,
+ config_paths=self.configs)
+ self.container_ring = get_ring(
+ 'container',
+ self.acct_cont_required_replicas,
+ self.acct_cont_required_devices,
+ ipport2server=self.ipport2server,
+ config_paths=self.configs)
+ self.policy = get_policy(**self.policy_requirements)
+ self.object_ring = get_ring(
+ self.policy.ring_name,
+ self.obj_required_replicas,
+ self.obj_required_devices,
+ server='object',
+ ipport2server=self.ipport2server,
+ config_paths=self.configs)
+
def setUp(self):
resetswift()
kill_orphans()
+ self._load_rings_and_configs()
try:
- self.ipport2server = {}
- self.configs = defaultdict(dict)
- self.account_ring = get_ring(
- 'account',
- self.acct_cont_required_replicas,
- self.acct_cont_required_devices,
- ipport2server=self.ipport2server,
- config_paths=self.configs)
- self.container_ring = get_ring(
- 'container',
- self.acct_cont_required_replicas,
- self.acct_cont_required_devices,
- ipport2server=self.ipport2server,
- config_paths=self.configs)
- self.policy = get_policy(**self.policy_requirements)
- self.object_ring = get_ring(
- self.policy.ring_name,
- self.obj_required_replicas,
- self.obj_required_devices,
- server='object',
- ipport2server=self.ipport2server,
- config_paths=self.configs)
-
self.servers_per_port = any(
int(readconf(c, section_name='object-replicator').get(
'servers_per_port', '0'))
@@ -489,6 +497,49 @@ class ProbeTest(unittest.TestCase):
finally:
shutil.rmtree(tempdir)
+ def get_all_object_nodes(self):
+ """
+ Returns a list of all nodes in all object storage policies.
+
+ :return: a list of node dicts.
+ """
+ all_obj_nodes = {}
+ for policy in ENABLED_POLICIES:
+ for dev in policy.object_ring.devs:
+ all_obj_nodes[dev['device']] = dev
+ return all_obj_nodes.values()
+
+ def gather_async_pendings(self, onodes):
+ """
+ Returns a list of paths to async pending files found on given nodes.
+
+ :param onodes: a list of nodes.
+ :return: a list of file paths.
+ """
+ async_pendings = []
+ for onode in onodes:
+ device_dir = self.device_dir('', onode)
+ for ap_pol_dir in os.listdir(device_dir):
+ if not ap_pol_dir.startswith('async_pending'):
+ # skip 'objects', 'containers', etc.
+ continue
+ async_pending_dir = os.path.join(device_dir, ap_pol_dir)
+ try:
+ ap_dirs = os.listdir(async_pending_dir)
+ except OSError as err:
+ if err.errno == errno.ENOENT:
+ pass
+ else:
+ raise
+ else:
+ for ap_dir in ap_dirs:
+ ap_dir_fullpath = os.path.join(
+ async_pending_dir, ap_dir)
+ async_pendings.extend([
+ os.path.join(ap_dir_fullpath, ent)
+ for ent in os.listdir(ap_dir_fullpath)])
+ return async_pendings
+
class ReplProbeTest(ProbeTest):
diff --git a/test/probe/test_object_expirer.py b/test/probe/test_object_expirer.py
index 92642f19d6..ad31662730 100644
--- a/test/probe/test_object_expirer.py
+++ b/test/probe/test_object_expirer.py
@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import errno
-import os
import random
import time
import uuid
@@ -143,31 +141,6 @@ class TestObjectExpirer(ReplProbeTest):
# tha the object server does not write out any async pendings; this
# test asserts that this is the case.
- def gather_async_pendings(onodes):
- async_pendings = []
- for onode in onodes:
- device_dir = self.device_dir('', onode)
- for ap_pol_dir in os.listdir(device_dir):
- if not ap_pol_dir.startswith('async_pending'):
- # skip 'objects', 'containers', etc.
- continue
- async_pending_dir = os.path.join(device_dir, ap_pol_dir)
- try:
- ap_dirs = os.listdir(async_pending_dir)
- except OSError as err:
- if err.errno == errno.ENOENT:
- pass
- else:
- raise
- else:
- for ap_dir in ap_dirs:
- ap_dir_fullpath = os.path.join(
- async_pending_dir, ap_dir)
- async_pendings.extend([
- os.path.join(ap_dir_fullpath, ent)
- for ent in os.listdir(ap_dir_fullpath)])
- return async_pendings
-
# Make an expiring object in each policy
for policy in ENABLED_POLICIES:
container_name = "expirer-test-%d" % policy.idx
@@ -191,15 +164,12 @@ class TestObjectExpirer(ReplProbeTest):
# Make sure there's no async_pendings anywhere. Probe tests only run
# on single-node installs anyway, so this set should be small enough
# that an exhaustive check doesn't take too long.
- all_obj_nodes = {}
- for policy in ENABLED_POLICIES:
- for dev in policy.object_ring.devs:
- all_obj_nodes[dev['device']] = dev
- pendings_before = gather_async_pendings(all_obj_nodes.values())
+ all_obj_nodes = self.get_all_object_nodes()
+ pendings_before = self.gather_async_pendings(all_obj_nodes)
# expire the objects
Manager(['object-expirer']).once()
- pendings_after = gather_async_pendings(all_obj_nodes.values())
+ pendings_after = self.gather_async_pendings(all_obj_nodes)
self.assertEqual(pendings_after, pendings_before)
def test_expirer_object_should_not_be_expired(self):
diff --git a/test/probe/test_sharder.py b/test/probe/test_sharder.py
new file mode 100644
index 0000000000..77ee3dd35b
--- /dev/null
+++ b/test/probe/test_sharder.py
@@ -0,0 +1,2025 @@
+# Copyright (c) 2017 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import hashlib
+import json
+import os
+import shutil
+import uuid
+
+from nose import SkipTest
+
+from swift.common import direct_client
+from swift.common.direct_client import DirectClientException
+from swift.common.utils import ShardRange, parse_db_filename, get_db_files, \
+ quorum_size, config_true_value, Timestamp
+from swift.container.backend import ContainerBroker, UNSHARDED, SHARDING
+from swift.common import utils
+from swift.common.manager import Manager
+from swiftclient import client, get_auth, ClientException
+
+from swift.proxy.controllers.obj import num_container_updates
+from test import annotate_failure
+from test.probe.brain import BrainSplitter
+from test.probe.common import ReplProbeTest, get_server_number, \
+ wait_for_server_to_hangup
+
+
+MIN_SHARD_CONTAINER_THRESHOLD = 4
+MAX_SHARD_CONTAINER_THRESHOLD = 100
+
+
+class ShardCollector(object):
+ """
+ Returns map of node to tuples of (headers, shard ranges) returned from node
+ """
+ def __init__(self):
+ self.ranges = {}
+
+ def __call__(self, cnode, cpart, account, container):
+ self.ranges[cnode['id']] = direct_client.direct_get_container(
+ cnode, cpart, account, container,
+ headers={'X-Backend-Record-Type': 'shard'})
+
+
+class BaseTestContainerSharding(ReplProbeTest):
+
+ def _maybe_skip_test(self):
+ try:
+ cont_configs = [utils.readconf(p, 'container-sharder')
+ for p in self.configs['container-server'].values()]
+ except ValueError:
+ raise SkipTest('No [container-sharder] section found in '
+ 'container-server configs')
+
+ skip_reasons = []
+ auto_shard = all([config_true_value(c.get('auto_shard', False))
+ for c in cont_configs])
+ if not auto_shard:
+ skip_reasons.append(
+ 'auto_shard must be true in all container_sharder configs')
+
+ self.max_shard_size = max(
+ int(c.get('shard_container_threshold', '1000000'))
+ for c in cont_configs)
+
+ if not (MIN_SHARD_CONTAINER_THRESHOLD <= self.max_shard_size
+ <= MAX_SHARD_CONTAINER_THRESHOLD):
+ skip_reasons.append(
+ 'shard_container_threshold %d must be between %d and %d' %
+ (self.max_shard_size, MIN_SHARD_CONTAINER_THRESHOLD,
+ MAX_SHARD_CONTAINER_THRESHOLD))
+
+ def skip_check(reason_list, option, required):
+ values = set([int(c.get(option, required)) for c in cont_configs])
+ if values != {required}:
+ reason_list.append('%s must be %s' % (option, required))
+
+ skip_check(skip_reasons, 'shard_scanner_batch_size', 10)
+ skip_check(skip_reasons, 'shard_batch_size', 2)
+
+ if skip_reasons:
+ raise SkipTest(', '.join(skip_reasons))
+
+ def _load_rings_and_configs(self):
+ super(BaseTestContainerSharding, self)._load_rings_and_configs()
+ # perform checks for skipping test before starting services
+ self._maybe_skip_test()
+
+ def _make_object_names(self, number):
+ return ['obj-%04d' % x for x in range(number)]
+
+ def _setup_container_name(self):
+ self.container_name = 'container-%s' % uuid.uuid4()
+
+ def setUp(self):
+ client.logger.setLevel(client.logging.WARNING)
+ client.requests.logging.getLogger().setLevel(
+ client.requests.logging.WARNING)
+ super(BaseTestContainerSharding, self).setUp()
+ _, self.admin_token = get_auth(
+ 'http://127.0.0.1:8080/auth/v1.0', 'admin:admin', 'admin')
+ self._setup_container_name()
+ self.brain = BrainSplitter(self.url, self.token, self.container_name,
+ None, 'container')
+ self.brain.put_container(policy_index=int(self.policy))
+ self.sharders = Manager(['container-sharder'])
+ self.internal_client = self.make_internal_client()
+
+ def stop_container_servers(self, node_numbers=None):
+ if node_numbers:
+ ipports = []
+ server2ipport = {v: k for k, v in self.ipport2server.items()}
+ for number in self.brain.node_numbers[node_numbers]:
+ self.brain.servers.stop(number=number)
+ server = 'container%d' % number
+ ipports.append(server2ipport[server])
+ else:
+ ipports = [k for k, v in self.ipport2server.items()
+ if v.startswith('container')]
+ self.brain.servers.stop()
+ for ipport in ipports:
+ wait_for_server_to_hangup(ipport)
+
+ def put_objects(self, obj_names):
+ for obj in obj_names:
+ client.put_object(self.url, self.token, self.container_name, obj)
+
+ def delete_objects(self, obj_names):
+ for obj in obj_names:
+ client.delete_object(
+ self.url, self.token, self.container_name, obj)
+
+ def get_container_shard_ranges(self, account=None, container=None):
+ account = account if account else self.account
+ container = container if container else self.container_name
+ path = self.internal_client.make_path(account, container)
+ resp = self.internal_client.make_request(
+ 'GET', path + '?format=json', {'X-Backend-Record-Type': 'shard'},
+ [200])
+ return [ShardRange.from_dict(sr) for sr in json.loads(resp.body)]
+
+ def direct_container_op(self, func, account=None, container=None,
+ expect_failure=False):
+ account = account if account else self.account
+ container = container if container else self.container_name
+ cpart, cnodes = self.container_ring.get_nodes(account, container)
+ unexpected_responses = []
+ results = {}
+ for cnode in cnodes:
+ try:
+ results[cnode['id']] = func(cnode, cpart, account, container)
+ except DirectClientException as err:
+ if not expect_failure:
+ unexpected_responses.append((cnode, err))
+ else:
+ if expect_failure:
+ unexpected_responses.append((cnode, 'success'))
+ if unexpected_responses:
+ self.fail('Unexpected responses: %s' % unexpected_responses)
+ return results
+
+ def direct_get_container_shard_ranges(self, account=None, container=None,
+ expect_failure=False):
+ collector = ShardCollector()
+ self.direct_container_op(
+ collector, account, container, expect_failure)
+ return collector.ranges
+
+ def direct_delete_container(self, account=None, container=None,
+ expect_failure=False):
+ self.direct_container_op(direct_client.direct_delete_container,
+ account, container, expect_failure)
+
+ def direct_head_container(self, account=None, container=None,
+ expect_failure=False):
+ return self.direct_container_op(direct_client.direct_head_container,
+ account, container, expect_failure)
+
+ def get_storage_dir(self, part, node, account=None, container=None):
+ account = account or self.brain.account
+ container = container or self.container_name
+ server_type, config_number = get_server_number(
+ (node['ip'], node['port']), self.ipport2server)
+ assert server_type == 'container'
+ repl_server = '%s-replicator' % server_type
+ conf = utils.readconf(self.configs[repl_server][config_number],
+ section_name=repl_server)
+ datadir = os.path.join(conf['devices'], node['device'], 'containers')
+ container_hash = utils.hash_path(account, container)
+ return (utils.storage_directory(datadir, part, container_hash),
+ container_hash)
+
+ def get_broker(self, part, node, account=None, container=None):
+ container_dir, container_hash = self.get_storage_dir(
+ part, node, account=account, container=container)
+ db_file = os.path.join(container_dir, container_hash + '.db')
+ self.assertTrue(get_db_files(db_file)) # sanity check
+ return ContainerBroker(db_file)
+
+ def categorize_container_dir_content(self, account=None, container=None):
+ account = account or self.brain.account
+ container = container or self.container_name
+ part, nodes = self.brain.ring.get_nodes(account, container)
+ storage_dirs = [
+ self.get_storage_dir(part, node, account=account,
+ container=container)[0]
+ for node in nodes]
+ result = {
+ 'shard_dbs': [],
+ 'normal_dbs': [],
+ 'pendings': [],
+ 'locks': [],
+ 'other': [],
+ }
+ for storage_dir in storage_dirs:
+ for f in os.listdir(storage_dir):
+ path = os.path.join(storage_dir, f)
+ if path.endswith('.db'):
+ hash_, epoch, ext = parse_db_filename(path)
+ if epoch:
+ result['shard_dbs'].append(path)
+ else:
+ result['normal_dbs'].append(path)
+ elif path.endswith('.db.pending'):
+ result['pendings'].append(path)
+ elif path.endswith('/.lock'):
+ result['locks'].append(path)
+ else:
+ result['other'].append(path)
+ if result['other']:
+ self.fail('Found unexpected files in storage directory:\n %s' %
+ '\n '.join(result['other']))
+ return result
+
+ def assertLengthEqual(self, obj, length):
+ obj_len = len(obj)
+ self.assertEqual(obj_len, length, 'len(%r) == %d, not %d' % (
+ obj, obj_len, length))
+
+ def assert_dict_contains(self, expected_items, actual_dict):
+ ignored = set(expected_items) ^ set(actual_dict)
+ filtered_actual = dict((k, actual_dict[k])
+ for k in actual_dict if k not in ignored)
+ self.assertEqual(expected_items, filtered_actual)
+
+ def assert_shard_ranges_contiguous(self, expected_number, shard_ranges,
+ first_lower='', last_upper=''):
+ if shard_ranges and isinstance(shard_ranges[0], ShardRange):
+ actual_shard_ranges = sorted(shard_ranges)
+ else:
+ actual_shard_ranges = sorted([ShardRange.from_dict(d)
+ for d in shard_ranges])
+ self.assertLengthEqual(actual_shard_ranges, expected_number)
+ if expected_number:
+ with annotate_failure('Ranges %s.' % actual_shard_ranges):
+ self.assertEqual(first_lower, actual_shard_ranges[0].lower_str)
+ for x, y in zip(actual_shard_ranges, actual_shard_ranges[1:]):
+ self.assertEqual(x.upper, y.lower)
+ self.assertEqual(last_upper, actual_shard_ranges[-1].upper_str)
+
+ def assert_shard_range_equal(self, expected, actual, excludes=None):
+ excludes = excludes or []
+ expected_dict = dict(expected)
+ actual_dict = dict(actual)
+ for k in excludes:
+ expected_dict.pop(k, None)
+ actual_dict.pop(k, None)
+ self.assertEqual(expected_dict, actual_dict)
+
+ def assert_shard_range_lists_equal(self, expected, actual, excludes=None):
+ self.assertEqual(len(expected), len(actual))
+ for expected, actual in zip(expected, actual):
+ self.assert_shard_range_equal(expected, actual, excludes=excludes)
+
+ def assert_shard_range_state(self, expected_state, shard_ranges):
+ if shard_ranges and not isinstance(shard_ranges[0], ShardRange):
+ shard_ranges = [ShardRange.from_dict(data)
+ for data in shard_ranges]
+ self.assertEqual([expected_state] * len(shard_ranges),
+ [sr.state for sr in shard_ranges])
+
+ def assert_total_object_count(self, expected_object_count, shard_ranges):
+ actual = sum([sr['object_count'] for sr in shard_ranges])
+ self.assertEqual(expected_object_count, actual)
+
+ def assert_container_listing(self, expected_listing):
+ headers, actual_listing = client.get_container(
+ self.url, self.token, self.container_name)
+ self.assertIn('x-container-object-count', headers)
+ expected_obj_count = len(expected_listing)
+ self.assertEqual(expected_listing, [
+ x['name'].encode('utf-8') for x in actual_listing])
+ self.assertEqual(str(expected_obj_count),
+ headers['x-container-object-count'])
+ return headers, actual_listing
+
+ def assert_container_object_count(self, expected_obj_count):
+ headers = client.head_container(
+ self.url, self.token, self.container_name)
+ self.assertIn('x-container-object-count', headers)
+ self.assertEqual(str(expected_obj_count),
+ headers['x-container-object-count'])
+
+ def assert_container_post_ok(self, meta_value):
+ key = 'X-Container-Meta-Assert-Post-Works'
+ headers = {key: meta_value}
+ client.post_container(
+ self.url, self.token, self.container_name, headers=headers)
+ resp_headers = client.head_container(
+ self.url, self.token, self.container_name)
+ self.assertEqual(meta_value, resp_headers.get(key.lower()))
+
+ def assert_container_post_fails(self, meta_value):
+ key = 'X-Container-Meta-Assert-Post-Works'
+ headers = {key: meta_value}
+ with self.assertRaises(ClientException) as cm:
+ client.post_container(
+ self.url, self.token, self.container_name, headers=headers)
+ self.assertEqual(404, cm.exception.http_status)
+
+ def assert_container_delete_fails(self):
+ with self.assertRaises(ClientException) as cm:
+ client.delete_container(self.url, self.token, self.container_name)
+ self.assertEqual(409, cm.exception.http_status)
+
+ def assert_container_not_found(self):
+ with self.assertRaises(ClientException) as cm:
+ client.get_container(self.url, self.token, self.container_name)
+ self.assertEqual(404, cm.exception.http_status)
+ # check for headers leaking out while deleted
+ resp_headers = cm.exception.http_response_headers
+ self.assertNotIn('X-Container-Object-Count', resp_headers)
+ self.assertNotIn('X-Container-Bytes-Used', resp_headers)
+ self.assertNotIn('X-Timestamp', resp_headers)
+ self.assertNotIn('X-PUT-Timestamp', resp_headers)
+
+ def assert_container_has_shard_sysmeta(self):
+ node_headers = self.direct_head_container()
+ for node_id, headers in node_headers.items():
+ with annotate_failure('%s in %s' % (node_id, node_headers.keys())):
+ for k, v in headers.items():
+ if k.lower().startswith('x-container-sysmeta-shard'):
+ break
+ else:
+ self.fail('No shard sysmeta found in %s' % headers)
+
+ def assert_container_state(self, node, expected_state, num_shard_ranges):
+ headers, shard_ranges = direct_client.direct_get_container(
+ node, self.brain.part, self.account, self.container_name,
+ headers={'X-Backend-Record-Type': 'shard'})
+ self.assertEqual(num_shard_ranges, len(shard_ranges))
+ self.assertIn('X-Backend-Sharding-State', headers)
+ self.assertEqual(
+ expected_state, headers['X-Backend-Sharding-State'])
+ return [ShardRange.from_dict(sr) for sr in shard_ranges]
+
+ def get_part_and_node_numbers(self, shard_range):
+ """Return the partition and node numbers for a shard range."""
+ part, nodes = self.brain.ring.get_nodes(
+ shard_range.account, shard_range.container)
+ return part, [n['id'] + 1 for n in nodes]
+
+ def run_sharders(self, shard_ranges):
+ """Run the sharder on partitions for given shard ranges."""
+ if not isinstance(shard_ranges, (list, tuple, set)):
+ shard_ranges = (shard_ranges,)
+ partitions = ','.join(str(self.get_part_and_node_numbers(sr)[0])
+ for sr in shard_ranges)
+ self.sharders.once(additional_args='--partitions=%s' % partitions)
+
+ def run_sharder_sequentially(self, shard_range=None):
+ """Run sharder node by node on partition for given shard range."""
+ if shard_range:
+ part, node_numbers = self.get_part_and_node_numbers(shard_range)
+ else:
+ part, node_numbers = self.brain.part, self.brain.node_numbers
+ for node_number in node_numbers:
+ self.sharders.once(number=node_number,
+ additional_args='--partitions=%s' % part)
+
+
+class TestContainerShardingNonUTF8(BaseTestContainerSharding):
+ def test_sharding_listing(self):
+ # verify parameterised listing of a container during sharding
+ all_obj_names = self._make_object_names(4 * self.max_shard_size)
+ obj_names = all_obj_names[::2]
+ self.put_objects(obj_names)
+ # choose some names approx in middle of each expected shard range
+ markers = [
+ obj_names[i] for i in range(self.max_shard_size / 4,
+ 2 * self.max_shard_size,
+ self.max_shard_size / 2)]
+
+ def check_listing(objects, **params):
+ qs = '&'.join(['%s=%s' % param for param in params.items()])
+ headers, listing = client.get_container(
+ self.url, self.token, self.container_name, query_string=qs)
+ listing = [x['name'].encode('utf-8') for x in listing]
+ if params.get('reverse'):
+ marker = params.get('marker', ShardRange.MAX)
+ end_marker = params.get('end_marker', ShardRange.MIN)
+ expected = [o for o in objects if end_marker < o < marker]
+ expected.reverse()
+ else:
+ marker = params.get('marker', ShardRange.MIN)
+ end_marker = params.get('end_marker', ShardRange.MAX)
+ expected = [o for o in objects if marker < o < end_marker]
+ if 'limit' in params:
+ expected = expected[:params['limit']]
+ self.assertEqual(expected, listing)
+
+ def check_listing_precondition_fails(**params):
+ qs = '&'.join(['%s=%s' % param for param in params.items()])
+ with self.assertRaises(ClientException) as cm:
+ client.get_container(
+ self.url, self.token, self.container_name, query_string=qs)
+ self.assertEqual(412, cm.exception.http_status)
+ return cm.exception
+
+ def do_listing_checks(objects):
+ check_listing(objects)
+ check_listing(objects, marker=markers[0], end_marker=markers[1])
+ check_listing(objects, marker=markers[0], end_marker=markers[2])
+ check_listing(objects, marker=markers[1], end_marker=markers[3])
+ check_listing(objects, marker=markers[1], end_marker=markers[3],
+ limit=self.max_shard_size / 4)
+ check_listing(objects, marker=markers[1], end_marker=markers[3],
+ limit=self.max_shard_size / 4)
+ check_listing(objects, marker=markers[1], end_marker=markers[2],
+ limit=self.max_shard_size / 2)
+ check_listing(objects, marker=markers[1], end_marker=markers[1])
+ check_listing(objects, reverse=True)
+ check_listing(objects, reverse=True, end_marker=markers[1])
+ check_listing(objects, reverse=True, marker=markers[3],
+ end_marker=markers[1], limit=self.max_shard_size / 4)
+ check_listing(objects, reverse=True, marker=markers[3],
+ end_marker=markers[1], limit=0)
+ check_listing([], marker=markers[0], end_marker=markers[0])
+ check_listing([], marker=markers[0], end_marker=markers[1],
+ reverse=True)
+ check_listing(objects, prefix='obj')
+ check_listing([], prefix='zzz')
+ # delimiter
+ headers, listing = client.get_container(
+ self.url, self.token, self.container_name,
+ query_string='delimiter=-')
+ self.assertEqual([{'subdir': 'obj-'}], listing)
+
+ limit = self.cluster_info['swift']['container_listing_limit']
+ exc = check_listing_precondition_fails(limit=limit + 1)
+ self.assertIn('Maximum limit', exc.http_response_content)
+ exc = check_listing_precondition_fails(delimiter='ab')
+ self.assertIn('Bad delimiter', exc.http_response_content)
+
+ # sanity checks
+ do_listing_checks(obj_names)
+
+ # Shard the container
+ client.post_container(self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+ # First run the 'leader' in charge of scanning, which finds all shard
+ # ranges and cleaves first two
+ self.sharders.once(number=self.brain.node_numbers[0],
+ additional_args='--partitions=%s' % self.brain.part)
+ # Then run sharder on other nodes which will also cleave first two
+ # shard ranges
+ for n in self.brain.node_numbers[1:]:
+ self.sharders.once(
+ number=n, additional_args='--partitions=%s' % self.brain.part)
+
+ # sanity check shard range states
+ for node in self.brain.nodes:
+ self.assert_container_state(node, 'sharding', 4)
+ shard_ranges = self.get_container_shard_ranges()
+ self.assertLengthEqual(shard_ranges, 4)
+ self.assert_shard_range_state(ShardRange.CLEAVED, shard_ranges[:2])
+ self.assert_shard_range_state(ShardRange.CREATED, shard_ranges[2:])
+
+ self.assert_container_delete_fails()
+ self.assert_container_has_shard_sysmeta() # confirm no sysmeta deleted
+ self.assert_container_post_ok('sharding')
+ do_listing_checks(obj_names)
+
+ # put some new objects spread through entire namespace
+ new_obj_names = all_obj_names[1::4]
+ self.put_objects(new_obj_names)
+
+ # new objects that fell into the first two cleaved shard ranges are
+ # reported in listing, new objects in the yet-to-be-cleaved shard
+ # ranges are not yet included in listing
+ exp_obj_names = [o for o in obj_names + new_obj_names
+ if o <= shard_ranges[1].upper]
+ exp_obj_names += [o for o in obj_names
+ if o > shard_ranges[1].upper]
+ exp_obj_names.sort()
+ do_listing_checks(exp_obj_names)
+
+ # run all the sharders again and the last two shard ranges get cleaved
+ self.sharders.once(additional_args='--partitions=%s' % self.brain.part)
+ for node in self.brain.nodes:
+ self.assert_container_state(node, 'sharded', 4)
+ shard_ranges = self.get_container_shard_ranges()
+ self.assert_shard_range_state(ShardRange.ACTIVE, shard_ranges)
+
+ exp_obj_names = obj_names + new_obj_names
+ exp_obj_names.sort()
+ do_listing_checks(exp_obj_names)
+ self.assert_container_delete_fails()
+ self.assert_container_has_shard_sysmeta()
+ self.assert_container_post_ok('sharded')
+
+ # delete original objects
+ self.delete_objects(obj_names)
+ do_listing_checks(new_obj_names)
+ self.assert_container_delete_fails()
+ self.assert_container_has_shard_sysmeta()
+ self.assert_container_post_ok('sharded')
+
+
+class TestContainerShardingUTF8(TestContainerShardingNonUTF8):
+ def _make_object_names(self, number):
+ # override default with names that include non-ascii chars
+ name_length = self.cluster_info['swift']['max_object_name_length']
+ obj_names = []
+ for x in range(number):
+ name = (u'obj-\u00e4\u00ea\u00ec\u00f2\u00fb-%04d' % x)
+ name = name.encode('utf8').ljust(name_length, 'o')
+ obj_names.append(name)
+ return obj_names
+
+ def _setup_container_name(self):
+ # override default with max length name that includes non-ascii chars
+ super(TestContainerShardingUTF8, self)._setup_container_name()
+ name_length = self.cluster_info['swift']['max_container_name_length']
+ cont_name = self.container_name + u'-\u00e4\u00ea\u00ec\u00f2\u00fb'
+ self.conainer_name = cont_name.encode('utf8').ljust(name_length, 'x')
+
+
+class TestContainerSharding(BaseTestContainerSharding):
+ def _test_sharded_listing(self, run_replicators=False):
+ obj_names = self._make_object_names(self.max_shard_size)
+ self.put_objects(obj_names)
+
+ # Verify that we start out with normal DBs, no shards
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['normal_dbs'], 3)
+ self.assertLengthEqual(found['shard_dbs'], 0)
+ for db_file in found['normal_dbs']:
+ broker = ContainerBroker(db_file)
+ self.assertIs(True, broker.is_root_container())
+ self.assertEqual('unsharded', broker.get_db_state())
+ self.assertLengthEqual(broker.get_shard_ranges(), 0)
+
+ headers, pre_sharding_listing = client.get_container(
+ self.url, self.token, self.container_name)
+ self.assertEqual(obj_names, [x['name'].encode('utf-8')
+ for x in pre_sharding_listing]) # sanity
+
+ # Shard it
+ client.post_container(self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+ pre_sharding_headers = client.head_container(
+ self.url, self.admin_token, self.container_name)
+ self.assertEqual('True',
+ pre_sharding_headers.get('x-container-sharding'))
+
+ # Only run the one in charge of scanning
+ self.sharders.once(number=self.brain.node_numbers[0],
+ additional_args='--partitions=%s' % self.brain.part)
+
+ # Verify that we have one sharded db -- though the other normal DBs
+ # received the shard ranges that got defined
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 1)
+ broker = ContainerBroker(found['shard_dbs'][0])
+ # TODO: assert the shard db is on replica 0
+ self.assertIs(True, broker.is_root_container())
+ self.assertEqual('sharded', broker.get_db_state())
+ orig_root_shard_ranges = [dict(sr) for sr in broker.get_shard_ranges()]
+ self.assertLengthEqual(orig_root_shard_ranges, 2)
+ self.assert_total_object_count(len(obj_names), orig_root_shard_ranges)
+ self.assert_shard_ranges_contiguous(2, orig_root_shard_ranges)
+ self.assertEqual([ShardRange.ACTIVE, ShardRange.ACTIVE],
+ [sr['state'] for sr in orig_root_shard_ranges])
+ self.direct_delete_container(expect_failure=True)
+
+ self.assertLengthEqual(found['normal_dbs'], 2)
+ for db_file in found['normal_dbs']:
+ broker = ContainerBroker(db_file)
+ self.assertIs(True, broker.is_root_container())
+ self.assertEqual('unsharded', broker.get_db_state())
+ # the sharded db had shard range meta_timestamps and state updated
+ # during cleaving, so we do not expect those to be equal on other
+ # nodes
+ self.assert_shard_range_lists_equal(
+ orig_root_shard_ranges, broker.get_shard_ranges(),
+ excludes=['meta_timestamp', 'state', 'state_timestamp'])
+
+ if run_replicators:
+ Manager(['container-replicator']).once()
+ # replication doesn't change the db file names
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 1)
+ self.assertLengthEqual(found['normal_dbs'], 2)
+
+ # Now that everyone has shard ranges, run *everyone*
+ self.sharders.once(additional_args='--partitions=%s' % self.brain.part)
+
+ # Verify that we only have shard dbs now
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 3)
+ self.assertLengthEqual(found['normal_dbs'], 0)
+ # Shards stayed the same
+ for db_file in found['shard_dbs']:
+ broker = ContainerBroker(db_file)
+ self.assertIs(True, broker.is_root_container())
+ self.assertEqual('sharded', broker.get_db_state())
+ # Well, except for meta_timestamps, since the shards each reported
+ self.assert_shard_range_lists_equal(
+ orig_root_shard_ranges, broker.get_shard_ranges(),
+ excludes=['meta_timestamp', 'state_timestamp'])
+ for orig, updated in zip(orig_root_shard_ranges,
+ broker.get_shard_ranges()):
+ self.assertGreaterEqual(updated.state_timestamp,
+ orig['state_timestamp'])
+ self.assertGreaterEqual(updated.meta_timestamp,
+ orig['meta_timestamp'])
+
+ # Check that entire listing is available
+ headers, actual_listing = self.assert_container_listing(obj_names)
+ # ... and check some other container properties
+ self.assertEqual(headers['last-modified'],
+ pre_sharding_headers['last-modified'])
+
+ # It even works in reverse!
+ headers, listing = client.get_container(self.url, self.token,
+ self.container_name,
+ query_string='reverse=on')
+ self.assertEqual(pre_sharding_listing[::-1], listing)
+
+ # Now put some new objects into first shard, taking its count to
+ # 3 shard ranges' worth
+ more_obj_names = [
+ 'beta%03d' % x for x in range(self.max_shard_size)]
+ self.put_objects(more_obj_names)
+
+ # The listing includes new objects...
+ headers, listing = self.assert_container_listing(
+ more_obj_names + obj_names)
+ self.assertEqual(pre_sharding_listing, listing[len(more_obj_names):])
+
+ # ...but root object count is out of date until the sharders run and
+ # update the root
+ self.assert_container_object_count(len(obj_names))
+
+ # run sharders on the shard to get root updated
+ shard_1 = ShardRange.from_dict(orig_root_shard_ranges[0])
+ self.run_sharders(shard_1)
+ self.assert_container_object_count(len(more_obj_names + obj_names))
+
+ # we've added objects enough that we need to shard the first shard
+ # *again* into three new sub-shards, but nothing happens until the root
+ # leader identifies shard candidate...
+ root_shard_ranges = self.direct_get_container_shard_ranges()
+ for node, (hdrs, root_shards) in root_shard_ranges.items():
+ self.assertLengthEqual(root_shards, 2)
+ with annotate_failure('node %s. ' % node):
+ self.assertEqual(
+ [ShardRange.ACTIVE] * 2,
+ [sr['state'] for sr in root_shards])
+ # orig shards 0, 1 should be contiguous
+ self.assert_shard_ranges_contiguous(2, root_shards)
+
+ # Now run the root leader to identify shard candidate...while one of
+ # the shard container servers is down
+ shard_1_part, shard_1_nodes = self.get_part_and_node_numbers(shard_1)
+ self.brain.servers.stop(number=shard_1_nodes[2])
+ self.sharders.once(number=self.brain.node_numbers[0],
+ additional_args='--partitions=%s' % self.brain.part)
+
+ # ... so third replica of first shard state is not moved to sharding
+ found_for_shard = self.categorize_container_dir_content(
+ shard_1.account, shard_1.container)
+ self.assertLengthEqual(found_for_shard['normal_dbs'], 3)
+ self.assertEqual(
+ [ShardRange.SHARDING, ShardRange.SHARDING, ShardRange.ACTIVE],
+ [ContainerBroker(db_file).get_own_shard_range().state
+ for db_file in found_for_shard['normal_dbs']])
+
+ # ...then run first cycle of first shard sharders in order, leader
+ # first, to get to predictable state where all nodes have cleaved 2 out
+ # of 3 ranges...starting with first two nodes
+ for node_number in shard_1_nodes[:2]:
+ self.sharders.once(
+ number=node_number,
+ additional_args='--partitions=%s' % shard_1_part)
+
+ # ... first two replicas start sharding to sub-shards
+ found_for_shard = self.categorize_container_dir_content(
+ shard_1.account, shard_1.container)
+ self.assertLengthEqual(found_for_shard['shard_dbs'], 2)
+ for db_file in found_for_shard['shard_dbs'][:2]:
+ broker = ContainerBroker(db_file)
+ with annotate_failure('shard db file %s. ' % db_file):
+ self.assertIs(False, broker.is_root_container())
+ self.assertEqual('sharding', broker.get_db_state())
+ self.assertEqual(
+ ShardRange.SHARDING, broker.get_own_shard_range().state)
+ shard_shards = broker.get_shard_ranges()
+ self.assertEqual(
+ [ShardRange.CLEAVED, ShardRange.CLEAVED,
+ ShardRange.CREATED],
+ [sr.state for sr in shard_shards])
+ self.assert_shard_ranges_contiguous(
+ 3, shard_shards,
+ first_lower=orig_root_shard_ranges[0]['lower'],
+ last_upper=orig_root_shard_ranges[0]['upper'])
+
+ # but third replica still has no idea it should be sharding
+ self.assertLengthEqual(found_for_shard['normal_dbs'], 3)
+ self.assertEqual(
+ ShardRange.ACTIVE,
+ ContainerBroker(
+ found_for_shard['normal_dbs'][2]).get_own_shard_range().state)
+
+ # ...but once sharder runs on third replica it will learn its state;
+ # note that any root replica on the stopped container server also won't
+ # know about the shards being in sharding state, so leave that server
+ # stopped for now so that shard fetches its state from an up-to-date
+ # root replica
+ self.sharders.once(
+ number=shard_1_nodes[2],
+ additional_args='--partitions=%s' % shard_1_part)
+
+ # third replica is sharding but has no sub-shard ranges yet...
+ found_for_shard = self.categorize_container_dir_content(
+ shard_1.account, shard_1.container)
+ self.assertLengthEqual(found_for_shard['shard_dbs'], 2)
+ self.assertLengthEqual(found_for_shard['normal_dbs'], 3)
+ broker = ContainerBroker(found_for_shard['normal_dbs'][2])
+ self.assertEqual('unsharded', broker.get_db_state())
+ self.assertEqual(
+ ShardRange.SHARDING, broker.get_own_shard_range().state)
+ self.assertFalse(broker.get_shard_ranges())
+
+ # ...until sub-shard ranges are replicated from another shard replica;
+ # there may also be a sub-shard replica missing so run replicators on
+ # all nodes to fix that if necessary
+ self.brain.servers.start(number=shard_1_nodes[2])
+ self.replicators.once()
+
+ # now run sharder again on third replica
+ self.sharders.once(
+ number=shard_1_nodes[2],
+ additional_args='--partitions=%s' % shard_1_part)
+
+ # check original first shard range state and sub-shards - all replicas
+ # should now be in consistent state
+ found_for_shard = self.categorize_container_dir_content(
+ shard_1.account, shard_1.container)
+ self.assertLengthEqual(found_for_shard['shard_dbs'], 3)
+ self.assertLengthEqual(found_for_shard['normal_dbs'], 3)
+ for db_file in found_for_shard['shard_dbs']:
+ broker = ContainerBroker(db_file)
+ with annotate_failure('shard db file %s. ' % db_file):
+ self.assertIs(False, broker.is_root_container())
+ self.assertEqual('sharding', broker.get_db_state())
+ self.assertEqual(
+ ShardRange.SHARDING, broker.get_own_shard_range().state)
+ shard_shards = broker.get_shard_ranges()
+ self.assertEqual(
+ [ShardRange.CLEAVED, ShardRange.CLEAVED,
+ ShardRange.CREATED],
+ [sr.state for sr in shard_shards])
+ self.assert_shard_ranges_contiguous(
+ 3, shard_shards,
+ first_lower=orig_root_shard_ranges[0]['lower'],
+ last_upper=orig_root_shard_ranges[0]['upper'])
+
+ # check third sub-shard is in created state
+ sub_shard = shard_shards[2]
+ found_for_sub_shard = self.categorize_container_dir_content(
+ sub_shard.account, sub_shard.container)
+ self.assertFalse(found_for_sub_shard['shard_dbs'])
+ self.assertLengthEqual(found_for_sub_shard['normal_dbs'], 3)
+ for db_file in found_for_sub_shard['normal_dbs']:
+ broker = ContainerBroker(db_file)
+ with annotate_failure('sub shard db file %s. ' % db_file):
+ self.assertIs(False, broker.is_root_container())
+ self.assertEqual('unsharded', broker.get_db_state())
+ self.assertEqual(
+ ShardRange.CREATED, broker.get_own_shard_range().state)
+ self.assertFalse(broker.get_shard_ranges())
+
+ # check root shard ranges
+ root_shard_ranges = self.direct_get_container_shard_ranges()
+ for node, (hdrs, root_shards) in root_shard_ranges.items():
+ self.assertLengthEqual(root_shards, 5)
+ with annotate_failure('node %s. ' % node):
+ # shard ranges are sorted by upper, state, lower, so expect:
+ # sub-shards, orig shard 0, orig shard 1
+ self.assertEqual(
+ [ShardRange.CLEAVED, ShardRange.CLEAVED,
+ ShardRange.CREATED, ShardRange.SHARDING,
+ ShardRange.ACTIVE],
+ [sr['state'] for sr in root_shards])
+ # sub-shards 0, 1, 2, orig shard 1 should be contiguous
+ self.assert_shard_ranges_contiguous(
+ 4, root_shards[:3] + root_shards[4:])
+ # orig shards 0, 1 should be contiguous
+ self.assert_shard_ranges_contiguous(2, root_shards[3:])
+
+ self.assert_container_listing(more_obj_names + obj_names)
+ self.assert_container_object_count(len(more_obj_names + obj_names))
+
+ # add another object that lands in the first of the new sub-shards
+ self.put_objects(['alpha'])
+
+ # TODO: assert that alpha is in the first new shard
+ self.assert_container_listing(['alpha'] + more_obj_names + obj_names)
+ # Run sharders again so things settle.
+ self.run_sharders(shard_1)
+
+ # check original first shard range shards
+ for db_file in found_for_shard['shard_dbs']:
+ broker = ContainerBroker(db_file)
+ with annotate_failure('shard db file %s. ' % db_file):
+ self.assertIs(False, broker.is_root_container())
+ self.assertEqual('sharded', broker.get_db_state())
+ self.assertEqual(
+ [ShardRange.ACTIVE] * 3,
+ [sr.state for sr in broker.get_shard_ranges()])
+ # check root shard ranges
+ root_shard_ranges = self.direct_get_container_shard_ranges()
+ for node, (hdrs, root_shards) in root_shard_ranges.items():
+ # old first shard range should have been deleted
+ self.assertLengthEqual(root_shards, 4)
+ with annotate_failure('node %s. ' % node):
+ self.assertEqual(
+ [ShardRange.ACTIVE] * 4,
+ [sr['state'] for sr in root_shards])
+ self.assert_shard_ranges_contiguous(4, root_shards)
+
+ headers, final_listing = self.assert_container_listing(
+ ['alpha'] + more_obj_names + obj_names)
+
+ # check root
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 3)
+ self.assertLengthEqual(found['normal_dbs'], 0)
+ new_shard_ranges = None
+ for db_file in found['shard_dbs']:
+ broker = ContainerBroker(db_file)
+ self.assertIs(True, broker.is_root_container())
+ self.assertEqual('sharded', broker.get_db_state())
+ if new_shard_ranges is None:
+ new_shard_ranges = broker.get_shard_ranges(
+ include_deleted=True)
+ self.assertLengthEqual(new_shard_ranges, 5)
+ # Second half is still there, and unchanged
+ self.assertIn(
+ dict(orig_root_shard_ranges[1], meta_timestamp=None,
+ state_timestamp=None),
+ [dict(sr, meta_timestamp=None, state_timestamp=None)
+ for sr in new_shard_ranges])
+ # But the first half split in three, then deleted
+ by_name = {sr.name: sr for sr in new_shard_ranges}
+ self.assertIn(orig_root_shard_ranges[0]['name'], by_name)
+ old_shard_range = by_name.pop(
+ orig_root_shard_ranges[0]['name'])
+ self.assertTrue(old_shard_range.deleted)
+ self.assert_shard_ranges_contiguous(4, by_name.values())
+ else:
+ # Everyone's on the same page. Well, except for
+ # meta_timestamps, since the shards each reported
+ other_shard_ranges = broker.get_shard_ranges(
+ include_deleted=True)
+ self.assert_shard_range_lists_equal(
+ new_shard_ranges, other_shard_ranges,
+ excludes=['meta_timestamp', 'state_timestamp'])
+ for orig, updated in zip(orig_root_shard_ranges,
+ other_shard_ranges):
+ self.assertGreaterEqual(updated.meta_timestamp,
+ orig['meta_timestamp'])
+
+ self.assert_container_delete_fails()
+
+ for obj in final_listing:
+ client.delete_object(
+ self.url, self.token, self.container_name, obj['name'])
+
+ # the objects won't be listed anymore
+ self.assert_container_listing([])
+ # but root container stats will not yet be aware of the deletions
+ self.assert_container_delete_fails()
+
+ # One server was down while the shard sharded its first two sub-shards,
+ # so there may be undeleted handoff db(s) for sub-shard(s) that were
+ # not fully replicated; run replicators now to clean up so they no
+ # longer report bogus stats to root.
+ self.replicators.once()
+
+ # Run sharder so that shard containers update the root. Do not run
+ # sharder on root container because that triggers shrinks which can
+ # cause root object count to temporarily be non-zero and prevent the
+ # final delete.
+ self.run_sharders(self.get_container_shard_ranges())
+ # then root is empty and can be deleted
+ self.assert_container_listing([])
+ self.assert_container_object_count(0)
+ client.delete_container(self.url, self.token, self.container_name)
+
+ def test_sharded_listing_no_replicators(self):
+ self._test_sharded_listing()
+
+ def test_sharded_listing_with_replicators(self):
+ self._test_sharded_listing(run_replicators=True)
+
+ def test_async_pendings(self):
+ obj_names = self._make_object_names(self.max_shard_size * 2)
+
+ # There are some updates *everyone* gets
+ self.put_objects(obj_names[::5])
+ # But roll some outages so each container only get ~2/5 more object
+ # records i.e. total of 3/5 updates per container; and async pendings
+ # pile up
+ for i, n in enumerate(self.brain.node_numbers, start=1):
+ self.brain.servers.stop(number=n)
+ self.put_objects(obj_names[i::5])
+ self.brain.servers.start(number=n)
+
+ # But there are also 1/5 updates *no one* gets
+ self.brain.servers.stop()
+ self.put_objects(obj_names[4::5])
+ self.brain.servers.start()
+
+ # Shard it
+ client.post_container(self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+ headers = client.head_container(self.url, self.admin_token,
+ self.container_name)
+ self.assertEqual('True', headers.get('x-container-sharding'))
+
+ # sanity check
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 0)
+ self.assertLengthEqual(found['normal_dbs'], 3)
+ for db_file in found['normal_dbs']:
+ broker = ContainerBroker(db_file)
+ self.assertIs(True, broker.is_root_container())
+ self.assertEqual(len(obj_names) * 3 // 5,
+ broker.get_info()['object_count'])
+
+ # Only run the 'leader' in charge of scanning.
+ # Each container has ~2 * max * 3/5 objects
+ # which are distributed from obj000 to obj<2 * max - 1>,
+ # so expect 3 shard ranges to be found: the first two will be complete
+ # shards with max/2 objects and lower/upper bounds spaced by approx:
+ # (2 * max - 1)/(2 * max * 3/5) * (max/2) =~ 5/6 * max
+ #
+ # Note that during this shard cycle the leader replicates to other
+ # nodes so they will end up with ~2 * max * 4/5 objects.
+ self.sharders.once(number=self.brain.node_numbers[0],
+ additional_args='--partitions=%s' % self.brain.part)
+
+ # Verify that we have one shard db -- though the other normal DBs
+ # received the shard ranges that got defined
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 1)
+ node_index_zero_db = found['shard_dbs'][0]
+ broker = ContainerBroker(node_index_zero_db)
+ self.assertIs(True, broker.is_root_container())
+ self.assertEqual(SHARDING, broker.get_db_state())
+ expected_shard_ranges = broker.get_shard_ranges()
+ self.assertLengthEqual(expected_shard_ranges, 3)
+ self.assertEqual(
+ [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED],
+ [sr.state for sr in expected_shard_ranges])
+
+ # Still have all three big DBs -- we've only cleaved 2 of the 3 shard
+ # ranges that got defined
+ self.assertLengthEqual(found['normal_dbs'], 3)
+ db_states = []
+ for db_file in found['normal_dbs']:
+ broker = ContainerBroker(db_file)
+ self.assertIs(True, broker.is_root_container())
+ db_states.append(broker.get_db_state())
+ # the sharded db had shard range meta_timestamps updated during
+ # cleaving, so we do not expect those to be equal on other nodes
+ self.assert_shard_range_lists_equal(
+ expected_shard_ranges, broker.get_shard_ranges(),
+ excludes=['meta_timestamp', 'state_timestamp', 'state'])
+ self.assertEqual(len(obj_names) * 3 // 5,
+ broker.get_info()['object_count'])
+ self.assertEqual([SHARDING, UNSHARDED, UNSHARDED], sorted(db_states))
+
+ # Run the other sharders so we're all in (roughly) the same state
+ for n in self.brain.node_numbers[1:]:
+ self.sharders.once(
+ number=n,
+ additional_args='--partitions=%s' % self.brain.part)
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 3)
+ self.assertLengthEqual(found['normal_dbs'], 3)
+ for db_file in found['normal_dbs']:
+ broker = ContainerBroker(db_file)
+ self.assertEqual(SHARDING, broker.get_db_state())
+ # no new rows
+ self.assertEqual(len(obj_names) * 3 // 5,
+ broker.get_info()['object_count'])
+
+ # Run updaters to clear the async pendings
+ Manager(['object-updater']).once()
+
+ # Our "big" dbs didn't take updates
+ for db_file in found['normal_dbs']:
+ broker = ContainerBroker(db_file)
+ self.assertEqual(len(obj_names) * 3 // 5,
+ broker.get_info()['object_count'])
+
+ # TODO: confirm that the updates got redirected to the shards
+
+ # The entire listing is not yet available - we have two cleaved shard
+ # ranges, complete with async updates, but for the remainder of the
+ # namespace only what landed in the original container
+ headers, listing = client.get_container(self.url, self.token,
+ self.container_name)
+ start_listing = [
+ o for o in obj_names if o <= expected_shard_ranges[1].upper]
+ self.assertEqual(
+ [x['name'].encode('utf-8') for x in listing[:len(start_listing)]],
+ start_listing)
+ # we can't assert much about the remaining listing, other than that
+ # there should be something
+ self.assertTrue(
+ [x['name'].encode('utf-8') for x in listing[len(start_listing):]])
+ # Object count is hard to reason about though!
+ # TODO: nail down what this *should* be and make sure all containers
+ # respond with it! Depending on what you're looking at, this
+ # could be 0, 1/2, 7/12 (!?), 3/5, 2/3, or 4/5 or all objects!
+ # Apparently, it may not even be present at all!
+ # self.assertIn('x-container-object-count', headers)
+ # self.assertEqual(headers['x-container-object-count'],
+ # str(len(obj_names) - len(obj_names) // 6))
+
+ # TODO: Doesn't work in reverse, yet
+ # headers, listing = client.get_container(self.url, self.token,
+ # self.container_name,
+ # query_string='reverse=on')
+ # self.assertEqual([x['name'].encode('utf-8') for x in listing],
+ # obj_names[::-1])
+
+ # Run the sharders again to get everything to settle
+ self.sharders.once()
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 3)
+ self.assertLengthEqual(found['normal_dbs'], 0)
+ # now all shards have been cleaved we should get the complete listing
+ headers, listing = client.get_container(self.url, self.token,
+ self.container_name)
+ self.assertEqual([x['name'].encode('utf-8') for x in listing],
+ obj_names)
+
+ def test_shrinking(self):
+ int_client = self.make_internal_client()
+
+ def check_node_data(node_data, exp_hdrs, exp_obj_count, exp_shards):
+ hdrs, range_data = node_data
+ self.assert_dict_contains(exp_hdrs, hdrs)
+ self.assert_shard_ranges_contiguous(exp_shards, range_data)
+ self.assert_total_object_count(exp_obj_count, range_data)
+
+ def check_shard_nodes_data(node_data, expected_state='unsharded',
+ expected_shards=0, exp_obj_count=0):
+ # checks that shard range is consistent on all nodes
+ root_path = '%s/%s' % (self.account, self.container_name)
+ exp_shard_hdrs = {'X-Container-Sysmeta-Shard-Root': root_path,
+ 'X-Backend-Sharding-State': expected_state}
+ object_counts = []
+ bytes_used = []
+ for node_id, node_data in node_data.items():
+ with annotate_failure('Node id %s.' % node_id):
+ check_node_data(
+ node_data, exp_shard_hdrs, exp_obj_count,
+ expected_shards)
+ hdrs = node_data[0]
+ object_counts.append(int(hdrs['X-Container-Object-Count']))
+ bytes_used.append(int(hdrs['X-Container-Bytes-Used']))
+ if len(set(object_counts)) != 1:
+ self.fail('Inconsistent object counts: %s' % object_counts)
+ if len(set(bytes_used)) != 1:
+ self.fail('Inconsistent bytes used: %s' % bytes_used)
+ return object_counts[0], bytes_used[0]
+
+ repeat = [0]
+
+ def do_shard_then_shrink():
+ repeat[0] += 1
+ obj_names = ['obj-%s-%03d' % (repeat[0], x)
+ for x in range(self.max_shard_size)]
+ self.put_objects(obj_names)
+ # these two object names will fall at start of first shard range...
+ alpha = 'alpha-%s' % repeat[0]
+ beta = 'beta-%s' % repeat[0]
+
+ # Enable sharding
+ client.post_container(
+ self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+
+ # sanity check
+ self.assert_container_listing(obj_names)
+
+ # Only run the one in charge of scanning
+ self.sharders.once(
+ number=self.brain.node_numbers[0],
+ additional_args='--partitions=%s' % self.brain.part)
+
+ # check root container
+ root_nodes_data = self.direct_get_container_shard_ranges()
+ self.assertEqual(3, len(root_nodes_data))
+
+ # nodes on which sharder has not run are still in unsharded state
+ # but have had shard ranges replicated to them
+ exp_obj_count = len(obj_names)
+ exp_hdrs = {'X-Backend-Sharding-State': 'unsharded',
+ 'X-Container-Object-Count': str(exp_obj_count)}
+ node_id = self.brain.node_numbers[1] - 1
+ check_node_data(
+ root_nodes_data[node_id], exp_hdrs, exp_obj_count, 2)
+ node_id = self.brain.node_numbers[2] - 1
+ check_node_data(
+ root_nodes_data[node_id], exp_hdrs, exp_obj_count, 2)
+
+ # only one that ran sharder is in sharded state
+ exp_hdrs['X-Backend-Sharding-State'] = 'sharded'
+ node_id = self.brain.node_numbers[0] - 1
+ check_node_data(
+ root_nodes_data[node_id], exp_hdrs, exp_obj_count, 2)
+
+ orig_range_data = root_nodes_data[node_id][1]
+ orig_shard_ranges = [ShardRange.from_dict(r)
+ for r in orig_range_data]
+
+ # check first shard
+ shard_nodes_data = self.direct_get_container_shard_ranges(
+ orig_shard_ranges[0].account, orig_shard_ranges[0].container)
+ obj_count, bytes_used = check_shard_nodes_data(shard_nodes_data)
+ total_shard_object_count = obj_count
+
+ # check second shard
+ shard_nodes_data = self.direct_get_container_shard_ranges(
+ orig_shard_ranges[1].account, orig_shard_ranges[1].container)
+ obj_count, bytes_used = check_shard_nodes_data(shard_nodes_data)
+ total_shard_object_count += obj_count
+ self.assertEqual(exp_obj_count, total_shard_object_count)
+
+ # Now that everyone has shard ranges, run *everyone*
+ self.sharders.once(
+ additional_args='--partitions=%s' % self.brain.part)
+
+ # all root container nodes should now be in sharded state
+ root_nodes_data = self.direct_get_container_shard_ranges()
+ self.assertEqual(3, len(root_nodes_data))
+ for node_id, node_data in root_nodes_data.items():
+ with annotate_failure('Node id %s.' % node_id):
+ check_node_data(node_data, exp_hdrs, exp_obj_count, 2)
+
+ # run updaters to update .sharded account; shard containers have
+ # not updated account since having objects replicated to them
+ self.updaters.once()
+ shard_cont_count, shard_obj_count = int_client.get_account_info(
+ orig_shard_ranges[0].account, [204])
+ self.assertEqual(2 * repeat[0], shard_cont_count)
+ self.assertEqual(len(obj_names), shard_obj_count)
+
+ # checking the listing also refreshes proxy container info cache so
+ # that the proxy becomes aware that container is sharded and will
+ # now look up the shard target for subsequent updates
+ self.assert_container_listing(obj_names)
+
+ # delete objects from first shard range
+ first_shard_objects = [obj_name for obj_name in obj_names
+ if obj_name <= orig_shard_ranges[0].upper]
+ for obj in first_shard_objects:
+ client.delete_object(
+ self.url, self.token, self.container_name, obj)
+ with self.assertRaises(ClientException):
+ client.get_object(
+ self.url, self.token, self.container_name, obj)
+
+ second_shard_objects = [obj_name for obj_name in obj_names
+ if obj_name > orig_shard_ranges[1].lower]
+ self.assert_container_listing(second_shard_objects)
+
+ self.put_objects([alpha])
+ second_shard_objects = [obj_name for obj_name in obj_names
+ if obj_name > orig_shard_ranges[1].lower]
+ self.assert_container_listing([alpha] + second_shard_objects)
+
+ # while container servers are down, but proxy has container info in
+ # cache from recent listing, put another object; this update will
+ # lurk in async pending until the updaters run again
+ # TODO: because all the root container servers are down and
+ # therefore cannot respond to a GET for a redirect target, the
+ # object update will default to being targeted at the root
+ # container; can we provoke an object update that does get targeted
+ # to the shard, but fails to update shard, so that the async
+ # pending will first be directed to the shard when the updaters
+ # run?
+ self.stop_container_servers()
+ self.put_objects([beta])
+ self.brain.servers.start()
+ async_pendings = self.gather_async_pendings(
+ self.get_all_object_nodes())
+ num_container_replicas = len(self.brain.nodes)
+ num_obj_replicas = self.policy.object_ring.replica_count
+ expected_num_updates = num_container_updates(
+ num_container_replicas, quorum_size(num_container_replicas),
+ num_obj_replicas, self.policy.quorum)
+ expected_num_pendings = min(expected_num_updates, num_obj_replicas)
+ # sanity check
+ with annotate_failure('policy %s. ' % self.policy):
+ self.assertLengthEqual(async_pendings, expected_num_pendings)
+
+ # root object count is not updated...
+ self.assert_container_object_count(len(obj_names))
+ self.assert_container_listing([alpha] + second_shard_objects)
+ root_nodes_data = self.direct_get_container_shard_ranges()
+ self.assertEqual(3, len(root_nodes_data))
+ for node_id, node_data in root_nodes_data.items():
+ with annotate_failure('Node id %s.' % node_id):
+ check_node_data(node_data, exp_hdrs, exp_obj_count, 2)
+ range_data = node_data[1]
+ self.assert_shard_range_lists_equal(
+ orig_range_data, range_data,
+ excludes=['meta_timestamp', 'state_timestamp'])
+
+ # ...until the sharders run and update root
+ self.run_sharders(orig_shard_ranges[0])
+ exp_obj_count = len(second_shard_objects) + 1
+ self.assert_container_object_count(exp_obj_count)
+ self.assert_container_listing([alpha] + second_shard_objects)
+
+ # root sharder finds donor, acceptor pair and pushes changes
+ self.sharders.once(
+ additional_args='--partitions=%s' % self.brain.part)
+ self.assert_container_listing([alpha] + second_shard_objects)
+ # run sharder on donor to shrink and replicate to acceptor
+ self.run_sharders(orig_shard_ranges[0])
+ self.assert_container_listing([alpha] + second_shard_objects)
+ # run sharder on acceptor to update root with stats
+ self.run_sharders(orig_shard_ranges[1])
+ self.assert_container_listing([alpha] + second_shard_objects)
+ self.assert_container_object_count(len(second_shard_objects) + 1)
+
+ # check root container
+ root_nodes_data = self.direct_get_container_shard_ranges()
+ self.assertEqual(3, len(root_nodes_data))
+ exp_hdrs['X-Container-Object-Count'] = str(exp_obj_count)
+ for node_id, node_data in root_nodes_data.items():
+ with annotate_failure('Node id %s.' % node_id):
+ # NB now only *one* shard range in root
+ check_node_data(node_data, exp_hdrs, exp_obj_count, 1)
+
+ # the acceptor shard is intact..
+ shard_nodes_data = self.direct_get_container_shard_ranges(
+ orig_shard_ranges[1].account, orig_shard_ranges[1].container)
+ obj_count, bytes_used = check_shard_nodes_data(shard_nodes_data)
+ # all objects should now be in this shard
+ self.assertEqual(exp_obj_count, obj_count)
+
+ # the donor shard is also still intact
+ # TODO: once we have figured out when these redundant donors are
+ # deleted, test for deletion/clean up
+ shard_nodes_data = self.direct_get_container_shard_ranges(
+ orig_shard_ranges[0].account, orig_shard_ranges[0].container)
+ # the donor's shard range will have the acceptor's projected stats
+ obj_count, bytes_used = check_shard_nodes_data(
+ shard_nodes_data, expected_state='sharded', expected_shards=1,
+ exp_obj_count=len(second_shard_objects) + 1)
+ # but the donor is empty and so reports zero stats
+ self.assertEqual(0, obj_count)
+ self.assertEqual(0, bytes_used)
+
+ # delete all the second shard's object apart from 'alpha'
+ for obj in second_shard_objects:
+ client.delete_object(
+ self.url, self.token, self.container_name, obj)
+
+ self.assert_container_listing([alpha])
+
+ # runs sharders so second range shrinks away, requires up to 3
+ # cycles
+ self.sharders.once() # shard updates root stats
+ self.assert_container_listing([alpha])
+ self.sharders.once() # root finds shrinkable shard
+ self.assert_container_listing([alpha])
+ self.sharders.once() # shards shrink themselves
+ self.assert_container_listing([alpha])
+
+ # the second shard range has sharded and is empty
+ shard_nodes_data = self.direct_get_container_shard_ranges(
+ orig_shard_ranges[1].account, orig_shard_ranges[1].container)
+ check_shard_nodes_data(
+ shard_nodes_data, expected_state='sharded', expected_shards=1,
+ exp_obj_count=1)
+
+ # check root container
+ root_nodes_data = self.direct_get_container_shard_ranges()
+ self.assertEqual(3, len(root_nodes_data))
+ exp_hdrs = {'X-Backend-Sharding-State': 'collapsed',
+ # just the alpha object
+ 'X-Container-Object-Count': '1'}
+ for node_id, node_data in root_nodes_data.items():
+ with annotate_failure('Node id %s.' % node_id):
+ # NB now no shard ranges in root
+ check_node_data(node_data, exp_hdrs, 0, 0)
+
+ # delete the alpha object
+ client.delete_object(
+ self.url, self.token, self.container_name, alpha)
+ # should now be able to delete the *apparently* empty container
+ client.delete_container(self.url, self.token, self.container_name)
+ self.assert_container_not_found()
+ self.direct_head_container(expect_failure=True)
+
+ # and the container stays deleted even after sharders run and shard
+ # send updates
+ self.sharders.once()
+ self.assert_container_not_found()
+ self.direct_head_container(expect_failure=True)
+
+ # now run updaters to deal with the async pending for the beta
+ # object
+ self.updaters.once()
+ # and the container is revived!
+ self.assert_container_listing([beta])
+
+ # finally, clear out the container
+ client.delete_object(
+ self.url, self.token, self.container_name, beta)
+
+ do_shard_then_shrink()
+ # repeat from starting point of a collapsed and previously deleted
+ # container
+ do_shard_then_shrink()
+
+ def _setup_replication_scenario(self, num_shards, extra_objs=('alpha',)):
+ # Get cluster to state where 2 replicas are sharding or sharded but 3rd
+ # replica is unsharded and has an object that the first 2 are missing.
+
+ # put objects while all servers are up
+ obj_names = self._make_object_names(
+ num_shards * self.max_shard_size / 2)
+ self.put_objects(obj_names)
+
+ client.post_container(self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+ node_numbers = self.brain.node_numbers
+
+ # run replicators first time to get sync points set
+ self.replicators.once()
+
+ # stop the leader node and one other server
+ self.stop_container_servers(slice(0, 2))
+
+ # ...then put one more object in first shard range namespace
+ self.put_objects(extra_objs)
+
+ # start leader and first other server, stop third server
+ for number in node_numbers[:2]:
+ self.brain.servers.start(number=number)
+ self.brain.servers.stop(number=node_numbers[2])
+ self.assert_container_listing(obj_names) # sanity check
+
+ # shard the container - first two shard ranges are cleaved
+ for number in node_numbers[:2]:
+ self.sharders.once(
+ number=number,
+ additional_args='--partitions=%s' % self.brain.part)
+
+ self.assert_container_listing(obj_names) # sanity check
+ return obj_names
+
+ def test_replication_to_sharding_container(self):
+ # verify that replication from an unsharded replica to a sharding
+ # replica does not replicate rows but does replicate shard ranges
+ obj_names = self._setup_replication_scenario(3)
+ for node in self.brain.nodes[:2]:
+ self.assert_container_state(node, 'sharding', 3)
+
+ # bring third server back up, run replicator
+ node_numbers = self.brain.node_numbers
+ self.brain.servers.start(number=node_numbers[2])
+ # sanity check...
+ self.assert_container_state(self.brain.nodes[2], 'unsharded', 0)
+ self.replicators.once(number=node_numbers[2])
+ # check db files unchanged
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 2)
+ self.assertLengthEqual(found['normal_dbs'], 3)
+
+ # the 'alpha' object is NOT replicated to the two sharded nodes
+ for node in self.brain.nodes[:2]:
+ broker = self.get_broker(self.brain.part, node)
+ with annotate_failure(
+ 'Node id %s in %s' % (node['id'], self.brain.nodes[:2])):
+ self.assertFalse(broker.get_objects())
+ self.assert_container_state(node, 'sharding', 3)
+ self.brain.servers.stop(number=node_numbers[2])
+ self.assert_container_listing(obj_names)
+
+ # all nodes now have shard ranges
+ self.brain.servers.start(number=node_numbers[2])
+ node_data = self.direct_get_container_shard_ranges()
+ for node, (hdrs, shard_ranges) in node_data.items():
+ with annotate_failure(node):
+ self.assert_shard_ranges_contiguous(3, shard_ranges)
+
+ # complete cleaving third shard range on first two nodes
+ self.brain.servers.stop(number=node_numbers[2])
+ for number in node_numbers[:2]:
+ self.sharders.once(
+ number=number,
+ additional_args='--partitions=%s' % self.brain.part)
+ # ...and now they are in sharded state
+ self.assert_container_state(self.brain.nodes[0], 'sharded', 3)
+ self.assert_container_state(self.brain.nodes[1], 'sharded', 3)
+ # ...still no 'alpha' object in listing
+ self.assert_container_listing(obj_names)
+
+ # run the sharder on the third server, alpha object is included in
+ # shards that it cleaves
+ self.brain.servers.start(number=node_numbers[2])
+ self.assert_container_state(self.brain.nodes[2], 'unsharded', 3)
+ self.sharders.once(number=node_numbers[2],
+ additional_args='--partitions=%s' % self.brain.part)
+ self.assert_container_state(self.brain.nodes[2], 'sharding', 3)
+ self.sharders.once(number=node_numbers[2],
+ additional_args='--partitions=%s' % self.brain.part)
+ self.assert_container_state(self.brain.nodes[2], 'sharded', 3)
+ self.assert_container_listing(['alpha'] + obj_names)
+
+ def test_replication_to_sharded_container(self):
+ # verify that replication from an unsharded replica to a sharded
+ # replica does not replicate rows but does replicate shard ranges
+ obj_names = self._setup_replication_scenario(2)
+ for node in self.brain.nodes[:2]:
+ self.assert_container_state(node, 'sharded', 2)
+
+ # sanity check
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 2)
+ self.assertLengthEqual(found['normal_dbs'], 1)
+ for node in self.brain.nodes[:2]:
+ broker = self.get_broker(self.brain.part, node)
+ info = broker.get_info()
+ with annotate_failure(
+ 'Node id %s in %s' % (node['id'], self.brain.nodes[:2])):
+ self.assertEqual(len(obj_names), info['object_count'])
+ self.assertFalse(broker.get_objects())
+
+ # bring third server back up, run replicator
+ node_numbers = self.brain.node_numbers
+ self.brain.servers.start(number=node_numbers[2])
+ # sanity check...
+ self.assert_container_state(self.brain.nodes[2], 'unsharded', 0)
+ self.replicators.once(number=node_numbers[2])
+ # check db files unchanged
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['shard_dbs'], 2)
+ self.assertLengthEqual(found['normal_dbs'], 1)
+
+ # the 'alpha' object is NOT replicated to the two sharded nodes
+ for node in self.brain.nodes[:2]:
+ broker = self.get_broker(self.brain.part, node)
+ with annotate_failure(
+ 'Node id %s in %s' % (node['id'], self.brain.nodes[:2])):
+ self.assertFalse(broker.get_objects())
+ self.assert_container_state(node, 'sharded', 2)
+ self.brain.servers.stop(number=node_numbers[2])
+ self.assert_container_listing(obj_names)
+
+ # all nodes now have shard ranges
+ self.brain.servers.start(number=node_numbers[2])
+ node_data = self.direct_get_container_shard_ranges()
+ for node, (hdrs, shard_ranges) in node_data.items():
+ with annotate_failure(node):
+ self.assert_shard_ranges_contiguous(2, shard_ranges)
+
+ # run the sharder on the third server, alpha object is included in
+ # shards that it cleaves
+ self.assert_container_state(self.brain.nodes[2], 'unsharded', 2)
+ self.sharders.once(number=node_numbers[2],
+ additional_args='--partitions=%s' % self.brain.part)
+ self.assert_container_state(self.brain.nodes[2], 'sharded', 2)
+ self.assert_container_listing(['alpha'] + obj_names)
+
+ def test_sharding_requires_sufficient_replication(self):
+ # verify that cleaving only progresses if each cleaved shard range is
+ # sufficiently replicated
+
+ # put enough objects for 4 shard ranges
+ obj_names = self._make_object_names(2 * self.max_shard_size)
+ self.put_objects(obj_names)
+
+ client.post_container(self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+ node_numbers = self.brain.node_numbers
+ leader_node = self.brain.nodes[0]
+ leader_num = node_numbers[0]
+
+ # run replicators first time to get sync points set
+ self.replicators.once()
+
+ # start sharding on the leader node
+ self.sharders.once(number=leader_num,
+ additional_args='--partitions=%s' % self.brain.part)
+ shard_ranges = self.assert_container_state(leader_node, 'sharding', 4)
+ self.assertEqual([ShardRange.CLEAVED] * 2 + [ShardRange.CREATED] * 2,
+ [sr.state for sr in shard_ranges])
+
+ # stop *all* container servers for third shard range
+ sr_part, sr_node_nums = self.get_part_and_node_numbers(shard_ranges[2])
+ for node_num in sr_node_nums:
+ self.brain.servers.stop(number=node_num)
+
+ # attempt to continue sharding on the leader node
+ self.sharders.once(number=leader_num,
+ additional_args='--partitions=%s' % self.brain.part)
+
+ # no cleaving progress was made
+ for node_num in sr_node_nums:
+ self.brain.servers.start(number=node_num)
+ shard_ranges = self.assert_container_state(leader_node, 'sharding', 4)
+ self.assertEqual([ShardRange.CLEAVED] * 2 + [ShardRange.CREATED] * 2,
+ [sr.state for sr in shard_ranges])
+
+ # stop two of the servers for third shard range, not including any
+ # server that happens to be the leader node
+ stopped = []
+ for node_num in sr_node_nums:
+ if node_num != leader_num:
+ self.brain.servers.stop(number=node_num)
+ stopped.append(node_num)
+ if len(stopped) >= 2:
+ break
+ self.assertLengthEqual(stopped, 2) # sanity check
+
+ # attempt to continue sharding on the leader node
+ self.sharders.once(number=leader_num,
+ additional_args='--partitions=%s' % self.brain.part)
+
+ # no cleaving progress was made
+ for node_num in stopped:
+ self.brain.servers.start(number=node_num)
+ shard_ranges = self.assert_container_state(leader_node, 'sharding', 4)
+ self.assertEqual([ShardRange.CLEAVED] * 2 + [ShardRange.CREATED] * 2,
+ [sr.state for sr in shard_ranges])
+
+ # stop just one of the servers for third shard range
+ stopped = []
+ for node_num in sr_node_nums:
+ if node_num != leader_num:
+ self.brain.servers.stop(number=node_num)
+ stopped.append(node_num)
+ break
+ self.assertLengthEqual(stopped, 1) # sanity check
+
+ # attempt to continue sharding the container
+ self.sharders.once(number=leader_num,
+ additional_args='--partitions=%s' % self.brain.part)
+
+ # this time cleaving completed
+ self.brain.servers.start(number=stopped[0])
+ shard_ranges = self.assert_container_state(leader_node, 'sharded', 4)
+ self.assertEqual([ShardRange.ACTIVE] * 4,
+ [sr.state for sr in shard_ranges])
+
+ def test_sharded_delete(self):
+ all_obj_names = self._make_object_names(self.max_shard_size)
+ self.put_objects(all_obj_names)
+ # Shard the container
+ client.post_container(self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+ for n in self.brain.node_numbers:
+ self.sharders.once(
+ number=n, additional_args='--partitions=%s' % self.brain.part)
+ # sanity checks
+ for node in self.brain.nodes:
+ self.assert_container_state(node, 'sharded', 2)
+ self.assert_container_delete_fails()
+ self.assert_container_has_shard_sysmeta()
+ self.assert_container_post_ok('sharded')
+ self.assert_container_listing(all_obj_names)
+
+ # delete all objects - updates redirected to shards
+ self.delete_objects(all_obj_names)
+ self.assert_container_listing([])
+ self.assert_container_post_ok('has objects')
+ # root not yet updated with shard stats
+ self.assert_container_object_count(len(all_obj_names))
+ self.assert_container_delete_fails()
+ self.assert_container_has_shard_sysmeta()
+
+ # run sharder on shard containers to update root stats
+ shard_ranges = self.get_container_shard_ranges()
+ self.assertLengthEqual(shard_ranges, 2)
+ self.run_sharders(shard_ranges)
+ self.assert_container_listing([])
+ self.assert_container_post_ok('empty')
+ self.assert_container_object_count(0)
+
+ # put a new object - update redirected to shard
+ self.put_objects(['alpha'])
+ self.assert_container_listing(['alpha'])
+ self.assert_container_object_count(0)
+
+ # before root learns about new object in shard, delete the container
+ client.delete_container(self.url, self.token, self.container_name)
+ self.assert_container_post_fails('deleted')
+ self.assert_container_not_found()
+
+ # run the sharders to update root with shard stats
+ self.run_sharders(shard_ranges)
+
+ self.assert_container_listing(['alpha'])
+ self.assert_container_object_count(1)
+ self.assert_container_delete_fails()
+ self.assert_container_post_ok('revived')
+
+ def test_object_update_redirection(self):
+ all_obj_names = self._make_object_names(self.max_shard_size)
+ self.put_objects(all_obj_names)
+ # Shard the container
+ client.post_container(self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+ for n in self.brain.node_numbers:
+ self.sharders.once(
+ number=n, additional_args='--partitions=%s' % self.brain.part)
+ # sanity checks
+ for node in self.brain.nodes:
+ self.assert_container_state(node, 'sharded', 2)
+ self.assert_container_delete_fails()
+ self.assert_container_has_shard_sysmeta()
+ self.assert_container_post_ok('sharded')
+ self.assert_container_listing(all_obj_names)
+
+ # delete all objects - updates redirected to shards
+ self.delete_objects(all_obj_names)
+ self.assert_container_listing([])
+ self.assert_container_post_ok('has objects')
+
+ # run sharder on shard containers to update root stats
+ shard_ranges = self.get_container_shard_ranges()
+ self.assertLengthEqual(shard_ranges, 2)
+ self.run_sharders(shard_ranges)
+ self.assert_container_object_count(0)
+
+ # First, test a misplaced object moving from one shard to another.
+ # with one shard server down, put a new 'alpha' object...
+ shard_part, shard_nodes = self.get_part_and_node_numbers(
+ shard_ranges[0])
+ self.brain.servers.stop(number=shard_nodes[2])
+ self.put_objects(['alpha'])
+ self.assert_container_listing(['alpha'])
+ self.assert_container_object_count(0)
+ self.assertLengthEqual(
+ self.gather_async_pendings(self.get_all_object_nodes()), 1)
+ self.brain.servers.start(number=shard_nodes[2])
+
+ # run sharder on root to discover first shrink candidate
+ self.sharders.once(additional_args='--partitions=%s' % self.brain.part)
+ # then run sharder on the shard node without the alpha object
+ self.sharders.once(additional_args='--partitions=%s' % shard_part,
+ number=shard_nodes[2])
+ # root sees first shard has shrunk, only second shard range used for
+ # listing so alpha object not in listing
+ self.assertLengthEqual(self.get_container_shard_ranges(), 1)
+ self.assert_container_listing([])
+ self.assert_container_object_count(0)
+
+ # run the updaters: the async pending update will be redirected from
+ # shrunk shard to second shard
+ self.updaters.once()
+ self.assert_container_listing(['alpha'])
+ self.assert_container_object_count(0) # root not yet updated
+
+ # then run sharder on other shard nodes to complete shrinking
+ for number in shard_nodes[:2]:
+ self.sharders.once(additional_args='--partitions=%s' % shard_part,
+ number=number)
+ # and get root updated
+ self.run_sharders(shard_ranges[1])
+ self.assert_container_listing(['alpha'])
+ self.assert_container_object_count(1)
+ self.assertLengthEqual(self.get_container_shard_ranges(), 1)
+
+ # Now we have just one active shard, test a misplaced object moving
+ # from that shard to the root.
+ # with one shard server down, delete 'alpha' and put a 'beta' object...
+ shard_part, shard_nodes = self.get_part_and_node_numbers(
+ shard_ranges[1])
+ self.brain.servers.stop(number=shard_nodes[2])
+ self.delete_objects(['alpha'])
+ self.put_objects(['beta'])
+ self.assert_container_listing(['beta'])
+ self.assert_container_object_count(1)
+ self.assertLengthEqual(
+ self.gather_async_pendings(self.get_all_object_nodes()), 2)
+ self.brain.servers.start(number=shard_nodes[2])
+
+ # run sharder on root to discover second shrink candidate - root is not
+ # yet aware of the beta object
+ self.sharders.once(additional_args='--partitions=%s' % self.brain.part)
+ # then run sharder on the shard node without the beta object, to shrink
+ # it to root - note this moves stale copy of alpha to the root db
+ self.sharders.once(additional_args='--partitions=%s' % shard_part,
+ number=shard_nodes[2])
+ # now there are no active shards
+ self.assertFalse(self.get_container_shard_ranges())
+
+ # with other two shard servers down, listing won't find beta object
+ for number in shard_nodes[:2]:
+ self.brain.servers.stop(number=number)
+ self.assert_container_listing(['alpha'])
+ self.assert_container_object_count(1)
+
+ # run the updaters: the async pending update will be redirected from
+ # shrunk shard to the root
+ self.updaters.once()
+ self.assert_container_listing(['beta'])
+ self.assert_container_object_count(1)
+
+ def test_misplaced_object_movement(self):
+ def merge_object(shard_range, name, deleted=0):
+ # it's hard to get a test to put a misplaced object into a shard,
+ # so this hack is used force an object record directly into a shard
+ # container db. Note: the actual object won't exist, we're just
+ # using this to test object records in container dbs.
+ shard_part, shard_nodes = self.brain.ring.get_nodes(
+ shard_range.account, shard_range.container)
+ shard_broker = self.get_broker(
+ shard_part, shard_nodes[0], shard_range.account,
+ shard_range.container)
+ shard_broker.merge_items(
+ [{'name': name, 'created_at': Timestamp.now().internal,
+ 'size': 0, 'content_type': 'text/plain',
+ 'etag': hashlib.md5().hexdigest(), 'deleted': deleted}])
+ return shard_nodes[0]
+
+ all_obj_names = self._make_object_names(self.max_shard_size)
+ self.put_objects(all_obj_names)
+ # Shard the container
+ client.post_container(self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+ for n in self.brain.node_numbers:
+ self.sharders.once(
+ number=n, additional_args='--partitions=%s' % self.brain.part)
+ # sanity checks
+ for node in self.brain.nodes:
+ self.assert_container_state(node, 'sharded', 2)
+ self.assert_container_delete_fails()
+ self.assert_container_has_shard_sysmeta()
+ self.assert_container_post_ok('sharded')
+ self.assert_container_listing(all_obj_names)
+
+ # delete all objects - updates redirected to shards
+ self.delete_objects(all_obj_names)
+ self.assert_container_listing([])
+ self.assert_container_post_ok('has objects')
+
+ # run sharder on shard containers to update root stats
+ shard_ranges = self.get_container_shard_ranges()
+ self.assertLengthEqual(shard_ranges, 2)
+ self.run_sharders(shard_ranges)
+ self.assert_container_object_count(0)
+
+ # First, test a misplaced object moving from one shard to another.
+ # run sharder on root to discover first shrink candidate
+ self.sharders.once(additional_args='--partitions=%s' % self.brain.part)
+ # then run sharder on first shard range to shrink it
+ shard_part, shard_nodes_numbers = self.get_part_and_node_numbers(
+ shard_ranges[0])
+ self.sharders.once(additional_args='--partitions=%s' % shard_part)
+ # force a misplaced object into the shrunken shard range to simulate
+ # a client put that was in flight when it started to shrink
+ misplaced_node = merge_object(shard_ranges[0], 'alpha', deleted=0)
+ # root sees first shard has shrunk, only second shard range used for
+ # listing so alpha object not in listing
+ self.assertLengthEqual(self.get_container_shard_ranges(), 1)
+ self.assert_container_listing([])
+ self.assert_container_object_count(0)
+ # until sharder runs on that node to move the misplaced object to the
+ # second shard range
+ self.sharders.once(additional_args='--partitions=%s' % shard_part,
+ number=misplaced_node['id'] + 1)
+ self.assert_container_listing(['alpha'])
+ self.assert_container_object_count(0) # root not yet updated
+
+ # run sharder to get root updated
+ self.run_sharders(shard_ranges[1])
+ self.assert_container_listing(['alpha'])
+ self.assert_container_object_count(1)
+ self.assertLengthEqual(self.get_container_shard_ranges(), 1)
+
+ # Now we have just one active shard, test a misplaced object moving
+ # from that shard to the root.
+ # run sharder on root to discover second shrink candidate
+ self.sharders.once(additional_args='--partitions=%s' % self.brain.part)
+ # then run sharder on the shard node to shrink it to root - note this
+ # moves alpha to the root db
+ shard_part, shard_nodes_numbers = self.get_part_and_node_numbers(
+ shard_ranges[1])
+ self.sharders.once(additional_args='--partitions=%s' % shard_part)
+ # now there are no active shards
+ self.assertFalse(self.get_container_shard_ranges())
+
+ # force some misplaced object updates into second shrunk shard range
+ merge_object(shard_ranges[1], 'alpha', deleted=1)
+ misplaced_node = merge_object(shard_ranges[1], 'beta', deleted=0)
+ # root is not yet aware of them
+ self.assert_container_listing(['alpha'])
+ self.assert_container_object_count(1)
+ # until sharder runs on that node to move the misplaced object
+ self.sharders.once(additional_args='--partitions=%s' % shard_part,
+ number=misplaced_node['id'] + 1)
+ self.assert_container_listing(['beta'])
+ self.assert_container_object_count(1)
+ self.assert_container_delete_fails()
+
+ def test_replication_to_sharded_container_from_unsharded_old_primary(self):
+ primary_ids = [n['id'] for n in self.brain.nodes]
+ handoff_node = next(n for n in self.brain.ring.devs
+ if n['id'] not in primary_ids)
+
+ # start with two sharded replicas and one unsharded with extra object
+ obj_names = self._setup_replication_scenario(2)
+ for node in self.brain.nodes[:2]:
+ self.assert_container_state(node, 'sharded', 2)
+
+ # Fake a ring change - copy unsharded db which has no shard ranges to a
+ # handoff to create illusion of a new unpopulated primary node
+ node_numbers = self.brain.node_numbers
+ new_primary_node = self.brain.nodes[2]
+ new_primary_node_number = node_numbers[2]
+ new_primary_dir, container_hash = self.get_storage_dir(
+ self.brain.part, new_primary_node)
+ old_primary_dir, container_hash = self.get_storage_dir(
+ self.brain.part, handoff_node)
+ utils.mkdirs(os.path.dirname(old_primary_dir))
+ os.rename(new_primary_dir, old_primary_dir)
+
+ # make the cluster more or less "healthy" again
+ self.brain.servers.start(number=new_primary_node_number)
+
+ # get a db on every node...
+ client.put_container(self.url, self.token, self.container_name)
+ self.assertTrue(os.path.exists(os.path.join(
+ new_primary_dir, container_hash + '.db')))
+ found = self.categorize_container_dir_content()
+ self.assertLengthEqual(found['normal_dbs'], 1) # "new" primary
+ self.assertLengthEqual(found['shard_dbs'], 2) # existing primaries
+
+ # catastrophic failure! drive dies and is replaced on unchanged primary
+ failed_node = self.brain.nodes[0]
+ failed_dir, _container_hash = self.get_storage_dir(
+ self.brain.part, failed_node)
+ shutil.rmtree(failed_dir)
+
+ # replicate the "old primary" to everybody except the "new primary"
+ self.brain.servers.stop(number=new_primary_node_number)
+ self.replicators.once(number=handoff_node['id'] + 1)
+
+ # We're willing to rsync the retiring db to the failed primary.
+ # This may or may not have shard ranges, depending on the order in
+ # which we hit the primaries, but it definitely *doesn't* have an
+ # epoch in its name yet. All objects are replicated.
+ self.assertTrue(os.path.exists(os.path.join(
+ failed_dir, container_hash + '.db')))
+ self.assertLengthEqual(os.listdir(failed_dir), 1)
+ broker = self.get_broker(self.brain.part, failed_node)
+ self.assertLengthEqual(broker.get_objects(), len(obj_names) + 1)
+
+ # The other out-of-date primary is within usync range but objects are
+ # not replicated to it because the handoff db learns about shard ranges
+ broker = self.get_broker(self.brain.part, self.brain.nodes[1])
+ self.assertLengthEqual(broker.get_objects(), 0)
+
+ # Handoff db still exists and now has shard ranges!
+ self.assertTrue(os.path.exists(os.path.join(
+ old_primary_dir, container_hash + '.db')))
+ broker = self.get_broker(self.brain.part, handoff_node)
+ shard_ranges = broker.get_shard_ranges()
+ self.assertLengthEqual(shard_ranges, 2)
+ self.assert_container_state(handoff_node, 'unsharded', 2)
+
+ # Replicate again, this time *including* "new primary"
+ self.brain.servers.start(number=new_primary_node_number)
+ self.replicators.once(number=handoff_node['id'] + 1)
+
+ # Ordinarily, we would have rsync_then_merge'd to "new primary"
+ # but instead we wait
+ broker = self.get_broker(self.brain.part, new_primary_node)
+ self.assertLengthEqual(broker.get_objects(), 0)
+ shard_ranges = broker.get_shard_ranges()
+ self.assertLengthEqual(shard_ranges, 2)
+
+ # so the next time the sharder comes along, it can push rows out
+ # and delete the big db
+ self.sharders.once(number=handoff_node['id'] + 1,
+ additional_args='--partitions=%s' % self.brain.part)
+ self.assert_container_state(handoff_node, 'sharded', 2)
+ self.assertFalse(os.path.exists(os.path.join(
+ old_primary_dir, container_hash + '.db')))
+ # the sharded db hangs around until replication confirms durability
+ # first attempt is not sufficiently successful
+ self.brain.servers.stop(number=node_numbers[0])
+ self.replicators.once(number=handoff_node['id'] + 1)
+ self.assertTrue(os.path.exists(old_primary_dir))
+ self.assert_container_state(handoff_node, 'sharded', 2)
+ # second attempt is successful and handoff db is deleted
+ self.brain.servers.start(number=node_numbers[0])
+ self.replicators.once(number=handoff_node['id'] + 1)
+ self.assertFalse(os.path.exists(old_primary_dir))
+
+ # run all the sharders, get us into a consistent state
+ self.sharders.once(additional_args='--partitions=%s' % self.brain.part)
+ self.assert_container_listing(['alpha'] + obj_names)
+
+ def test_replication_to_empty_new_primary_from_sharding_old_primary(self):
+ primary_ids = [n['id'] for n in self.brain.nodes]
+ handoff_node = next(n for n in self.brain.ring.devs
+ if n['id'] not in primary_ids)
+ num_shards = 3
+ obj_names = self._make_object_names(
+ num_shards * self.max_shard_size / 2)
+ self.put_objects(obj_names)
+ client.post_container(self.url, self.admin_token, self.container_name,
+ headers={'X-Container-Sharding': 'on'})
+
+ # run replicators first time to get sync points set
+ self.replicators.once()
+ # start sharding on only the leader node
+ leader_node = self.brain.nodes[0]
+ leader_node_number = self.brain.node_numbers[0]
+ self.sharders.once(number=leader_node_number)
+ self.assert_container_state(leader_node, 'sharding', 3)
+ for node in self.brain.nodes[1:]:
+ self.assert_container_state(node, 'unsharded', 3)
+
+ # Fake a ring change - copy leader node db to a handoff to create
+ # illusion of a new unpopulated primary leader node
+ new_primary_dir, container_hash = self.get_storage_dir(
+ self.brain.part, leader_node)
+ old_primary_dir, container_hash = self.get_storage_dir(
+ self.brain.part, handoff_node)
+ utils.mkdirs(os.path.dirname(old_primary_dir))
+ os.rename(new_primary_dir, old_primary_dir)
+ self.assert_container_state(handoff_node, 'sharding', 3)
+
+ # run replicator on handoff node to create a fresh db on new primary
+ self.assertFalse(os.path.exists(new_primary_dir))
+ self.replicators.once(number=handoff_node['id'] + 1)
+ self.assertTrue(os.path.exists(new_primary_dir))
+ self.assert_container_state(leader_node, 'sharded', 3)
+ broker = self.get_broker(self.brain.part, leader_node)
+ shard_ranges = broker.get_shard_ranges()
+ self.assertLengthEqual(shard_ranges, 3)
+ self.assertEqual(
+ [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED],
+ [sr.state for sr in shard_ranges])
+
+ # db still exists on handoff
+ self.assertTrue(os.path.exists(old_primary_dir))
+ self.assert_container_state(handoff_node, 'sharding', 3)
+ # continue sharding it...
+ self.sharders.once(number=handoff_node['id'] + 1)
+ self.assert_container_state(leader_node, 'sharded', 3)
+ # now handoff is fully sharded the replicator will delete it
+ self.replicators.once(number=handoff_node['id'] + 1)
+ self.assertFalse(os.path.exists(old_primary_dir))
+
+ # all primaries now have active shard ranges but only one is in sharded
+ # state
+ self.assert_container_state(leader_node, 'sharded', 3)
+ for node in self.brain.nodes[1:]:
+ self.assert_container_state(node, 'unsharded', 3)
+ node_data = self.direct_get_container_shard_ranges()
+ for node_id, (hdrs, shard_ranges) in node_data.items():
+ with annotate_failure(
+ 'node id %s from %s' % (node_id, node_data.keys)):
+ self.assert_shard_range_state(ShardRange.ACTIVE, shard_ranges)
+
+ # check handoff cleaved all objects before it was deleted - stop all
+ # but leader node so that listing is fetched from shards
+ for number in self.brain.node_numbers[1:3]:
+ self.brain.servers.stop(number=number)
+
+ self.assert_container_listing(obj_names)
+
+ for number in self.brain.node_numbers[1:3]:
+ self.brain.servers.start(number=number)
+
+ self.sharders.once()
+ self.assert_container_state(leader_node, 'sharded', 3)
+ for node in self.brain.nodes[1:]:
+ self.assert_container_state(node, 'sharding', 3)
+ self.sharders.once()
+ for node in self.brain.nodes:
+ self.assert_container_state(node, 'sharded', 3)
+
+ self.assert_container_listing(obj_names)
diff --git a/test/unit/__init__.py b/test/unit/__init__.py
index a07b1b2879..278c55a4ca 100644
--- a/test/unit/__init__.py
+++ b/test/unit/__init__.py
@@ -751,6 +751,8 @@ class FakeStatus(object):
:param response_sleep: float, time to eventlet sleep during response
"""
# connect exception
+ if inspect.isclass(status) and issubclass(status, Exception):
+ raise status('FakeStatus Error')
if isinstance(status, (Exception, eventlet.Timeout)):
raise status
if isinstance(status, tuple):
@@ -1063,6 +1065,15 @@ def make_timestamp_iter(offset=0):
for t in itertools.count(int(time.time()) + offset))
+@contextmanager
+def mock_timestamp_now(now=None):
+ if now is None:
+ now = Timestamp.now()
+ with mocklib.patch('swift.common.utils.Timestamp.now',
+ classmethod(lambda c: now)):
+ yield now
+
+
class Timeout(object):
def __init__(self, seconds):
self.seconds = seconds
@@ -1323,3 +1334,55 @@ def skip_if_no_xattrs():
if not xattr_supported_check():
raise SkipTest('Large xattrs not supported in `%s`. Skipping test' %
gettempdir())
+
+
+def unlink_files(paths):
+ for path in paths:
+ try:
+ os.unlink(path)
+ except OSError as err:
+ if err.errno != errno.ENOENT:
+ raise
+
+
+class FakeHTTPResponse(object):
+
+ def __init__(self, resp):
+ self.resp = resp
+
+ @property
+ def status(self):
+ return self.resp.status_int
+
+ @property
+ def data(self):
+ return self.resp.body
+
+
+def attach_fake_replication_rpc(rpc, replicate_hook=None, errors=None):
+ class FakeReplConnection(object):
+
+ def __init__(self, node, partition, hash_, logger):
+ self.logger = logger
+ self.node = node
+ self.partition = partition
+ self.path = '/%s/%s/%s' % (node['device'], partition, hash_)
+ self.host = node['replication_ip']
+
+ def replicate(self, op, *sync_args):
+ print('REPLICATE: %s, %s, %r' % (self.path, op, sync_args))
+ resp = None
+ if errors and op in errors and errors[op]:
+ resp = errors[op].pop(0)
+ if not resp:
+ replicate_args = self.path.lstrip('/').split('/')
+ args = [op] + copy.deepcopy(list(sync_args))
+ with mock_check_drive(isdir=not rpc.mount_check,
+ ismount=rpc.mount_check):
+ swob_response = rpc.dispatch(replicate_args, args)
+ resp = FakeHTTPResponse(swob_response)
+ if replicate_hook:
+ replicate_hook(op, *sync_args)
+ return resp
+
+ return FakeReplConnection
diff --git a/test/unit/account/test_server.py b/test/unit/account/test_server.py
index 2c00773441..4a8f58cb05 100644
--- a/test/unit/account/test_server.py
+++ b/test/unit/account/test_server.py
@@ -404,7 +404,7 @@ class TestAccountController(unittest.TestCase):
elif state[0] == 'race':
# Save the original db_file attribute value
self._saved_db_file = self.db_file
- self.db_file += '.doesnotexist'
+ self._db_file += '.doesnotexist'
def initialize(self, *args, **kwargs):
if state[0] == 'initial':
@@ -413,7 +413,7 @@ class TestAccountController(unittest.TestCase):
elif state[0] == 'race':
# Restore the original db_file attribute to get the race
# behavior
- self.db_file = self._saved_db_file
+ self._db_file = self._saved_db_file
return super(InterceptedAcBr, self).initialize(*args, **kwargs)
with mock.patch("swift.account.server.AccountBroker", InterceptedAcBr):
diff --git a/test/unit/cli/test_info.py b/test/unit/cli/test_info.py
index d1ea79cff3..1d5c56e9f4 100644
--- a/test/unit/cli/test_info.py
+++ b/test/unit/cli/test_info.py
@@ -31,6 +31,7 @@ from swift.cli.info import (print_db_info_metadata, print_ring_locations,
parse_get_node_args)
from swift.account.server import AccountController
from swift.container.server import ContainerController
+from swift.container.backend import UNSHARDED, SHARDED
from swift.obj.diskfile import write_metadata
@@ -103,17 +104,18 @@ class TestCliInfo(TestCliInfoBase):
self.assertRaisesMessage(ValueError, 'Info is incomplete',
print_db_info_metadata, 'container', {}, {})
- info = dict(
- account='acct',
- created_at=100.1,
- put_timestamp=106.3,
- delete_timestamp=107.9,
- status_changed_at=108.3,
- container_count='3',
- object_count='20',
- bytes_used='42')
- info['hash'] = 'abaddeadbeefcafe'
- info['id'] = 'abadf100d0ddba11'
+ info = {
+ 'account': 'acct',
+ 'created_at': 100.1,
+ 'put_timestamp': 106.3,
+ 'delete_timestamp': 107.9,
+ 'status_changed_at': 108.3,
+ 'container_count': '3',
+ 'object_count': '20',
+ 'bytes_used': '42',
+ 'hash': 'abaddeadbeefcafe',
+ 'id': 'abadf100d0ddba11',
+ }
md = {'x-account-meta-mydata': ('swift', '0000000000.00000'),
'x-other-something': ('boo', '0000000000.00000')}
out = StringIO()
@@ -154,7 +156,9 @@ No system metadata found in db file
reported_object_count='20',
reported_bytes_used='42',
x_container_foo='bar',
- x_container_bar='goo')
+ x_container_bar='goo',
+ db_state=UNSHARDED,
+ is_root=True)
info['hash'] = 'abaddeadbeefcafe'
info['id'] = 'abadf100d0ddba11'
md = {'x-container-sysmeta-mydata': ('swift', '0000000000.00000')}
@@ -182,10 +186,88 @@ Metadata:
X-Container-Bar: goo
X-Container-Foo: bar
System Metadata: {'mydata': 'swift'}
-No user metadata found in db file''' % POLICIES[0].name
+No user metadata found in db file
+Sharding Metadata:
+ Type: root
+ State: unsharded''' % POLICIES[0].name
self.assertEqual(sorted(out.getvalue().strip().split('\n')),
sorted(exp_out.split('\n')))
+ def test_print_db_info_metadata_with_shard_ranges(self):
+
+ shard_ranges = [utils.ShardRange(
+ name='.sharded_a/shard_range_%s' % i,
+ timestamp=utils.Timestamp(i), lower='%da' % i,
+ upper='%dz' % i, object_count=i, bytes_used=i,
+ meta_timestamp=utils.Timestamp(i)) for i in range(1, 4)]
+ shard_ranges[0].state = utils.ShardRange.CLEAVED
+ shard_ranges[1].state = utils.ShardRange.CREATED
+
+ info = dict(
+ account='acct',
+ container='cont',
+ storage_policy_index=0,
+ created_at='0000000100.10000',
+ put_timestamp='0000000106.30000',
+ delete_timestamp='0000000107.90000',
+ status_changed_at='0000000108.30000',
+ object_count='20',
+ bytes_used='42',
+ reported_put_timestamp='0000010106.30000',
+ reported_delete_timestamp='0000010107.90000',
+ reported_object_count='20',
+ reported_bytes_used='42',
+ db_state=SHARDED,
+ is_root=True,
+ shard_ranges=shard_ranges)
+ info['hash'] = 'abaddeadbeefcafe'
+ info['id'] = 'abadf100d0ddba11'
+ out = StringIO()
+ with mock.patch('sys.stdout', out):
+ print_db_info_metadata('container', info, {})
+ exp_out = '''Path: /acct/cont
+ Account: acct
+ Container: cont
+ Container Hash: d49d0ecbb53be1fcc49624f2f7c7ccae
+Metadata:
+ Created at: 1970-01-01T00:01:40.100000 (0000000100.10000)
+ Put Timestamp: 1970-01-01T00:01:46.300000 (0000000106.30000)
+ Delete Timestamp: 1970-01-01T00:01:47.900000 (0000000107.90000)
+ Status Timestamp: 1970-01-01T00:01:48.300000 (0000000108.30000)
+ Object Count: 20
+ Bytes Used: 42
+ Storage Policy: %s (0)
+ Reported Put Timestamp: 1970-01-01T02:48:26.300000 (0000010106.30000)
+ Reported Delete Timestamp: 1970-01-01T02:48:27.900000 (0000010107.90000)
+ Reported Object Count: 20
+ Reported Bytes Used: 42
+ Chexor: abaddeadbeefcafe
+ UUID: abadf100d0ddba11
+No system metadata found in db file
+No user metadata found in db file
+Sharding Metadata:
+ Type: root
+ State: sharded
+Shard Ranges (3):
+ Name: .sharded_a/shard_range_1
+ lower: '1a', upper: '1z'
+ Object Count: 1, Bytes Used: 1, State: cleaved (30)
+ Created at: 1970-01-01T00:00:01.000000 (0000000001.00000)
+ Meta Timestamp: 1970-01-01T00:00:01.000000 (0000000001.00000)
+ Name: .sharded_a/shard_range_2
+ lower: '2a', upper: '2z'
+ Object Count: 2, Bytes Used: 2, State: created (20)
+ Created at: 1970-01-01T00:00:02.000000 (0000000002.00000)
+ Meta Timestamp: 1970-01-01T00:00:02.000000 (0000000002.00000)
+ Name: .sharded_a/shard_range_3
+ lower: '3a', upper: '3z'
+ Object Count: 3, Bytes Used: 3, State: found (10)
+ Created at: 1970-01-01T00:00:03.000000 (0000000003.00000)
+ Meta Timestamp: 1970-01-01T00:00:03.000000 (0000000003.00000)''' %\
+ POLICIES[0].name
+ self.assertEqual(sorted(out.getvalue().strip().split('\n')),
+ sorted(exp_out.strip().split('\n')))
+
def test_print_ring_locations_invalid_args(self):
self.assertRaises(ValueError, print_ring_locations,
None, 'dir', 'acct')
@@ -423,14 +505,8 @@ No user metadata found in db file''' % POLICIES[0].name
'1', 'b47',
'dc5be2aa4347a22a0fee6bc7de505b47',
'dc5be2aa4347a22a0fee6bc7de505b47.db')
- try:
- print_info('account', db_file, swift_dir=self.testdir)
- except Exception:
- exp_raised = True
- if exp_raised:
- self.fail("Unexpected exception raised")
- else:
- self.assertGreater(len(out.getvalue().strip()), 800)
+ print_info('account', db_file, swift_dir=self.testdir)
+ self.assertGreater(len(out.getvalue().strip()), 800)
controller = ContainerController(
{'devices': self.testdir, 'mount_check': 'false'})
diff --git a/test/unit/cli/test_manage_shard_ranges.py b/test/unit/cli/test_manage_shard_ranges.py
new file mode 100644
index 0000000000..8cefa5b19c
--- /dev/null
+++ b/test/unit/cli/test_manage_shard_ranges.py
@@ -0,0 +1,362 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy
+# of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from __future__ import unicode_literals
+
+import json
+import os
+import unittest
+import mock
+from shutil import rmtree
+from tempfile import mkdtemp
+
+from six.moves import cStringIO as StringIO
+
+from swift.cli.manage_shard_ranges import main
+from swift.common import utils
+from swift.common.utils import Timestamp, ShardRange
+from swift.container.backend import ContainerBroker
+from test.unit import mock_timestamp_now
+
+
+class TestManageShardRanges(unittest.TestCase):
+ def setUp(self):
+ self.testdir = os.path.join(mkdtemp(), 'tmp_test_cli_find_shards')
+ utils.mkdirs(self.testdir)
+ rmtree(self.testdir)
+ self.shard_data = [
+ {'index': 0, 'lower': '', 'upper': 'obj09', 'object_count': 10},
+ {'index': 1, 'lower': 'obj09', 'upper': 'obj19',
+ 'object_count': 10},
+ {'index': 2, 'lower': 'obj19', 'upper': 'obj29',
+ 'object_count': 10},
+ {'index': 3, 'lower': 'obj29', 'upper': 'obj39',
+ 'object_count': 10},
+ {'index': 4, 'lower': 'obj39', 'upper': 'obj49',
+ 'object_count': 10},
+ {'index': 5, 'lower': 'obj49', 'upper': 'obj59',
+ 'object_count': 10},
+ {'index': 6, 'lower': 'obj59', 'upper': 'obj69',
+ 'object_count': 10},
+ {'index': 7, 'lower': 'obj69', 'upper': 'obj79',
+ 'object_count': 10},
+ {'index': 8, 'lower': 'obj79', 'upper': 'obj89',
+ 'object_count': 10},
+ {'index': 9, 'lower': 'obj89', 'upper': '', 'object_count': 10},
+ ]
+
+ def tearDown(self):
+ rmtree(os.path.dirname(self.testdir))
+
+ def assert_starts_with(self, value, prefix):
+ self.assertTrue(value.startswith(prefix),
+ "%r does not start with %r" % (value, prefix))
+
+ def assert_formatted_json(self, output, expected):
+ try:
+ loaded = json.loads(output)
+ except ValueError as err:
+ self.fail('Invalid JSON: %s\n%r' % (err, output))
+ # Check this one first, for a prettier diff
+ self.assertEqual(loaded, expected)
+ formatted = json.dumps(expected, sort_keys=True, indent=2) + '\n'
+ self.assertEqual(output, formatted)
+
+ def _make_broker(self, account='a', container='c',
+ device='sda', part=0):
+ datadir = os.path.join(
+ self.testdir, device, 'containers', str(part), 'ash', 'hash')
+ db_file = os.path.join(datadir, 'hash.db')
+ broker = ContainerBroker(
+ db_file, account=account, container=container)
+ broker.initialize()
+ return broker
+
+ def test_find_shard_ranges(self):
+ db_file = os.path.join(self.testdir, 'hash.db')
+ broker = ContainerBroker(db_file)
+ broker.account = 'a'
+ broker.container = 'c'
+ broker.initialize()
+ ts = utils.Timestamp.now()
+ broker.merge_items([
+ {'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0,
+ 'content_type': 'application/octet-stream', 'etag': 'not-really',
+ 'deleted': 0, 'storage_policy_index': 0,
+ 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal}
+ for i in range(100)])
+
+ # Default uses a large enough value that sharding isn't required
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ main([db_file, 'find'])
+ self.assert_formatted_json(out.getvalue(), [])
+ err_lines = err.getvalue().split('\n')
+ self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
+ self.assert_starts_with(err_lines[1], 'Found 0 ranges in ')
+
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ main([db_file, 'find', '100'])
+ self.assert_formatted_json(out.getvalue(), [])
+ err_lines = err.getvalue().split('\n')
+ self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
+ self.assert_starts_with(err_lines[1], 'Found 0 ranges in ')
+
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ main([db_file, 'find', '99'])
+ self.assert_formatted_json(out.getvalue(), [
+ {'index': 0, 'lower': '', 'upper': 'obj98', 'object_count': 99},
+ {'index': 1, 'lower': 'obj98', 'upper': '', 'object_count': 1},
+ ])
+ err_lines = err.getvalue().split('\n')
+ self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
+ self.assert_starts_with(err_lines[1], 'Found 2 ranges in ')
+
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ main([db_file, 'find', '10'])
+ self.assert_formatted_json(out.getvalue(), [
+ {'index': 0, 'lower': '', 'upper': 'obj09', 'object_count': 10},
+ {'index': 1, 'lower': 'obj09', 'upper': 'obj19',
+ 'object_count': 10},
+ {'index': 2, 'lower': 'obj19', 'upper': 'obj29',
+ 'object_count': 10},
+ {'index': 3, 'lower': 'obj29', 'upper': 'obj39',
+ 'object_count': 10},
+ {'index': 4, 'lower': 'obj39', 'upper': 'obj49',
+ 'object_count': 10},
+ {'index': 5, 'lower': 'obj49', 'upper': 'obj59',
+ 'object_count': 10},
+ {'index': 6, 'lower': 'obj59', 'upper': 'obj69',
+ 'object_count': 10},
+ {'index': 7, 'lower': 'obj69', 'upper': 'obj79',
+ 'object_count': 10},
+ {'index': 8, 'lower': 'obj79', 'upper': 'obj89',
+ 'object_count': 10},
+ {'index': 9, 'lower': 'obj89', 'upper': '', 'object_count': 10},
+ ])
+ err_lines = err.getvalue().split('\n')
+ self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
+ self.assert_starts_with(err_lines[1], 'Found 10 ranges in ')
+
+ def test_info(self):
+ broker = self._make_broker()
+ broker.update_metadata({'X-Container-Sysmeta-Sharding':
+ (True, Timestamp.now().internal)})
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ main([broker.db_file, 'info'])
+ expected = ['Sharding enabled = True',
+ 'Own shard range: None',
+ 'db_state = unsharded',
+ 'Metadata:',
+ ' X-Container-Sysmeta-Sharding = True']
+ self.assertEqual(expected, out.getvalue().splitlines())
+ self.assertEqual(['Loaded db broker for a/c.'],
+ err.getvalue().splitlines())
+
+ retiring_db_id = broker.get_info()['id']
+ broker.merge_shard_ranges(ShardRange('.shards/cc', Timestamp.now()))
+ epoch = Timestamp.now()
+ with mock_timestamp_now(epoch) as now:
+ broker.enable_sharding(epoch)
+ self.assertTrue(broker.set_sharding_state())
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ with mock_timestamp_now(now):
+ main([broker.db_file, 'info'])
+ expected = ['Sharding enabled = True',
+ 'Own shard range: {',
+ ' "bytes_used": 0, ',
+ ' "deleted": 0, ',
+ ' "epoch": "%s", ' % epoch.internal,
+ ' "lower": "", ',
+ ' "meta_timestamp": "%s", ' % now.internal,
+ ' "name": "a/c", ',
+ ' "object_count": 0, ',
+ ' "state": "sharding", ',
+ ' "state_timestamp": "%s", ' % now.internal,
+ ' "timestamp": "%s", ' % now.internal,
+ ' "upper": ""',
+ '}',
+ 'db_state = sharding',
+ 'Retiring db id: %s' % retiring_db_id,
+ 'Cleaving context: {',
+ ' "cleave_to_row": null, ',
+ ' "cleaving_done": false, ',
+ ' "cursor": "", ',
+ ' "last_cleave_to_row": null, ',
+ ' "max_row": -1, ',
+ ' "misplaced_done": false, ',
+ ' "ranges_done": 0, ',
+ ' "ranges_todo": 0, ',
+ ' "ref": "%s"' % retiring_db_id,
+ '}',
+ 'Metadata:',
+ ' X-Container-Sysmeta-Sharding = True']
+ self.assertEqual(expected, out.getvalue().splitlines())
+ self.assertEqual(['Loaded db broker for a/c.'],
+ err.getvalue().splitlines())
+
+ self.assertTrue(broker.set_sharded_state())
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ with mock_timestamp_now(now):
+ main([broker.db_file, 'info'])
+ expected = ['Sharding enabled = True',
+ 'Own shard range: {',
+ ' "bytes_used": 0, ',
+ ' "deleted": 0, ',
+ ' "epoch": "%s", ' % epoch.internal,
+ ' "lower": "", ',
+ ' "meta_timestamp": "%s", ' % now.internal,
+ ' "name": "a/c", ',
+ ' "object_count": 0, ',
+ ' "state": "sharding", ',
+ ' "state_timestamp": "%s", ' % now.internal,
+ ' "timestamp": "%s", ' % now.internal,
+ ' "upper": ""',
+ '}',
+ 'db_state = sharded',
+ 'Metadata:',
+ ' X-Container-Sysmeta-Sharding = True']
+ self.assertEqual(expected, out.getvalue().splitlines())
+ self.assertEqual(['Loaded db broker for a/c.'],
+ err.getvalue().splitlines())
+
+ def test_replace(self):
+ broker = self._make_broker()
+ broker.update_metadata({'X-Container-Sysmeta-Sharding':
+ (True, Timestamp.now().internal)})
+ input_file = os.path.join(self.testdir, 'shards')
+ with open(input_file, 'wb') as fd:
+ json.dump(self.shard_data, fd)
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ main([broker.db_file, 'replace', input_file])
+ expected = [
+ 'No shard ranges found to delete.',
+ 'Injected 10 shard ranges.',
+ 'Run container-replicator to replicate them to other nodes.',
+ 'Use the enable sub-command to enable sharding.']
+ self.assertEqual(expected, out.getvalue().splitlines())
+ self.assertEqual(['Loaded db broker for a/c.'],
+ err.getvalue().splitlines())
+ self.assertEqual(
+ [(data['lower'], data['upper']) for data in self.shard_data],
+ [(sr.lower_str, sr.upper_str) for sr in broker.get_shard_ranges()])
+
+ def _assert_enabled(self, broker, epoch):
+ own_sr = broker.get_own_shard_range()
+ self.assertEqual(ShardRange.SHARDING, own_sr.state)
+ self.assertEqual(epoch, own_sr.epoch)
+ self.assertEqual(ShardRange.MIN, own_sr.lower)
+ self.assertEqual(ShardRange.MAX, own_sr.upper)
+ self.assertEqual(
+ 'True', broker.metadata['X-Container-Sysmeta-Sharding'][0])
+
+ def test_enable(self):
+ broker = self._make_broker()
+ broker.update_metadata({'X-Container-Sysmeta-Sharding':
+ (True, Timestamp.now().internal)})
+ # no shard ranges
+ out = StringIO()
+ err = StringIO()
+ with self.assertRaises(SystemExit):
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ main([broker.db_file, 'enable'])
+ expected = ["WARNING: invalid shard ranges: ['No shard ranges.'].",
+ 'Aborting.']
+ self.assertEqual(expected, out.getvalue().splitlines())
+ self.assertEqual(['Loaded db broker for a/c.'],
+ err.getvalue().splitlines())
+
+ # success
+ shard_ranges = []
+ for data in self.shard_data:
+ path = ShardRange.make_path(
+ '.shards_a', 'c', 'c', Timestamp.now(), data['index'])
+ shard_ranges.append(
+ ShardRange(path, Timestamp.now(), data['lower'],
+ data['upper'], data['object_count']))
+ broker.merge_shard_ranges(shard_ranges)
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ with mock_timestamp_now() as now:
+ main([broker.db_file, 'enable'])
+ expected = [
+ "Container moved to state 'sharding' with epoch %s." %
+ now.internal,
+ 'Run container-sharder on all nodes to shard the container.']
+ self.assertEqual(expected, out.getvalue().splitlines())
+ self.assertEqual(['Loaded db broker for a/c.'],
+ err.getvalue().splitlines())
+ self._assert_enabled(broker, now)
+
+ # already enabled
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ main([broker.db_file, 'enable'])
+ expected = [
+ "Container already in state 'sharding' with epoch %s." %
+ now.internal,
+ 'No action required.',
+ 'Run container-sharder on all nodes to shard the container.']
+ self.assertEqual(expected, out.getvalue().splitlines())
+ self.assertEqual(['Loaded db broker for a/c.'],
+ err.getvalue().splitlines())
+ self._assert_enabled(broker, now)
+
+ def test_find_replace_enable(self):
+ db_file = os.path.join(self.testdir, 'hash.db')
+ broker = ContainerBroker(db_file)
+ broker.account = 'a'
+ broker.container = 'c'
+ broker.initialize()
+ ts = utils.Timestamp.now()
+ broker.merge_items([
+ {'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0,
+ 'content_type': 'application/octet-stream', 'etag': 'not-really',
+ 'deleted': 0, 'storage_policy_index': 0,
+ 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal}
+ for i in range(100)])
+ out = StringIO()
+ err = StringIO()
+ with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
+ with mock_timestamp_now() as now:
+ main([broker.db_file, 'find_and_replace', '10', '--enable'])
+ expected = [
+ 'No shard ranges found to delete.',
+ 'Injected 10 shard ranges.',
+ 'Run container-replicator to replicate them to other nodes.',
+ "Container moved to state 'sharding' with epoch %s." %
+ now.internal,
+ 'Run container-sharder on all nodes to shard the container.']
+ self.assertEqual(expected, out.getvalue().splitlines())
+ self.assertEqual(['Loaded db broker for a/c.'],
+ err.getvalue().splitlines())
+ self._assert_enabled(broker, now)
+ self.assertEqual(
+ [(data['lower'], data['upper']) for data in self.shard_data],
+ [(sr.lower_str, sr.upper_str) for sr in broker.get_shard_ranges()])
diff --git a/test/unit/common/test_db.py b/test/unit/common/test_db.py
index f605d0acba..6f723e13a7 100644
--- a/test/unit/common/test_db.py
+++ b/test/unit/common/test_db.py
@@ -38,7 +38,7 @@ from swift.common.constraints import \
MAX_META_VALUE_LENGTH, MAX_META_COUNT, MAX_META_OVERALL_SIZE
from swift.common.db import chexor, dict_factory, get_db_connection, \
DatabaseBroker, DatabaseConnectionError, DatabaseAlreadyExists, \
- GreenDBConnection, PICKLE_PROTOCOL
+ GreenDBConnection, PICKLE_PROTOCOL, zero_like
from swift.common.utils import normalize_timestamp, mkdirs, Timestamp
from swift.common.exceptions import LockTimeout
from swift.common.swob import HTTPException
@@ -46,6 +46,30 @@ from swift.common.swob import HTTPException
from test.unit import with_tempdir
+class TestHelperFunctions(unittest.TestCase):
+
+ def test_zero_like(self):
+ expectations = {
+ # value => expected
+ None: True,
+ True: False,
+ '': True,
+ 'asdf': False,
+ 0: True,
+ 1: False,
+ '0': True,
+ '1': False,
+ }
+ errors = []
+ for value, expected in expectations.items():
+ rv = zero_like(value)
+ if rv != expected:
+ errors.append('zero_like(%r) => %r expected %r' % (
+ value, rv, expected))
+ if errors:
+ self.fail('Some unexpected return values:\n' + '\n'.join(errors))
+
+
class TestDatabaseConnectionError(unittest.TestCase):
def test_str(self):
@@ -989,6 +1013,19 @@ class TestDatabaseBroker(unittest.TestCase):
self.assertEqual(broker.get_sync(uuid3), 2)
broker.merge_syncs([{'sync_point': 5, 'remote_id': uuid2}])
self.assertEqual(broker.get_sync(uuid2), 5)
+ # max sync point sticks
+ broker.merge_syncs([{'sync_point': 5, 'remote_id': uuid2}])
+ self.assertEqual(broker.get_sync(uuid2), 5)
+ self.assertEqual(broker.get_sync(uuid3), 2)
+ broker.merge_syncs([{'sync_point': 4, 'remote_id': uuid2}])
+ self.assertEqual(broker.get_sync(uuid2), 5)
+ self.assertEqual(broker.get_sync(uuid3), 2)
+ broker.merge_syncs([{'sync_point': -1, 'remote_id': uuid2},
+ {'sync_point': 3, 'remote_id': uuid3}])
+ self.assertEqual(broker.get_sync(uuid2), 5)
+ self.assertEqual(broker.get_sync(uuid3), 3)
+ self.assertEqual(broker.get_sync(uuid2, incoming=False), 3)
+ self.assertEqual(broker.get_sync(uuid3, incoming=False), 4)
def test_get_replication_info(self):
self.get_replication_info_tester(metadata=False)
@@ -1089,11 +1126,9 @@ class TestDatabaseBroker(unittest.TestCase):
'max_row': 1, 'id': broker_uuid, 'metadata': broker_metadata})
return broker
- def test_metadata(self):
- def reclaim(broker, timestamp):
- with broker.get() as conn:
- broker._reclaim(conn, timestamp)
- conn.commit()
+ # only testing _reclaim_metadata here
+ @patch.object(DatabaseBroker, '_reclaim')
+ def test_metadata(self, mock_reclaim):
# Initializes a good broker for us
broker = self.get_replication_info_tester(metadata=True)
# Add our first item
@@ -1134,7 +1169,7 @@ class TestDatabaseBroker(unittest.TestCase):
self.assertEqual(broker.metadata['Second'],
[second_value, second_timestamp])
# Reclaim at point before second item was deleted
- reclaim(broker, normalize_timestamp(3))
+ broker.reclaim(normalize_timestamp(3), normalize_timestamp(3))
self.assertIn('First', broker.metadata)
self.assertEqual(broker.metadata['First'],
[first_value, first_timestamp])
@@ -1142,7 +1177,7 @@ class TestDatabaseBroker(unittest.TestCase):
self.assertEqual(broker.metadata['Second'],
[second_value, second_timestamp])
# Reclaim at point second item was deleted
- reclaim(broker, normalize_timestamp(4))
+ broker.reclaim(normalize_timestamp(4), normalize_timestamp(4))
self.assertIn('First', broker.metadata)
self.assertEqual(broker.metadata['First'],
[first_value, first_timestamp])
@@ -1150,11 +1185,18 @@ class TestDatabaseBroker(unittest.TestCase):
self.assertEqual(broker.metadata['Second'],
[second_value, second_timestamp])
# Reclaim after point second item was deleted
- reclaim(broker, normalize_timestamp(5))
+ broker.reclaim(normalize_timestamp(5), normalize_timestamp(5))
self.assertIn('First', broker.metadata)
self.assertEqual(broker.metadata['First'],
[first_value, first_timestamp])
self.assertNotIn('Second', broker.metadata)
+ # Delete first item (by setting to empty string)
+ first_timestamp = normalize_timestamp(6)
+ broker.update_metadata({'First': ['', first_timestamp]})
+ self.assertIn('First', broker.metadata)
+ # Check that sync_timestamp doesn't cause item to be reclaimed
+ broker.reclaim(normalize_timestamp(5), normalize_timestamp(99))
+ self.assertIn('First', broker.metadata)
def test_update_metadata_missing_container_info(self):
# Test missing container_info/container_stat row
@@ -1197,7 +1239,7 @@ class TestDatabaseBroker(unittest.TestCase):
exc = None
try:
with broker.get() as conn:
- broker._reclaim(conn, 0)
+ broker._reclaim_metadata(conn, 0)
except Exception as err:
exc = err
self.assertEqual(
@@ -1333,5 +1375,141 @@ class TestDatabaseBroker(unittest.TestCase):
else:
self.fail('Expected an exception to be raised')
+ def test_skip_commits(self):
+ broker = DatabaseBroker(':memory:')
+ self.assertTrue(broker._skip_commit_puts())
+ broker._initialize = MagicMock()
+ broker.initialize(Timestamp.now())
+ self.assertTrue(broker._skip_commit_puts())
+
+ # not initialized
+ db_file = os.path.join(self.testdir, '1.db')
+ broker = DatabaseBroker(db_file)
+ self.assertFalse(os.path.exists(broker.db_file)) # sanity check
+ self.assertTrue(broker._skip_commit_puts())
+
+ # no pending file
+ broker._initialize = MagicMock()
+ broker.initialize(Timestamp.now())
+ self.assertTrue(os.path.exists(broker.db_file)) # sanity check
+ self.assertFalse(os.path.exists(broker.pending_file)) # sanity check
+ self.assertTrue(broker._skip_commit_puts())
+
+ # pending file exists
+ with open(broker.pending_file, 'wb'):
+ pass
+ self.assertTrue(os.path.exists(broker.pending_file)) # sanity check
+ self.assertFalse(broker._skip_commit_puts())
+
+ # skip_commits is True
+ broker.skip_commits = True
+ self.assertTrue(broker._skip_commit_puts())
+
+ # re-init
+ broker = DatabaseBroker(db_file)
+ self.assertFalse(broker._skip_commit_puts())
+
+ # constructor can override
+ broker = DatabaseBroker(db_file, skip_commits=True)
+ self.assertTrue(broker._skip_commit_puts())
+
+ def test_commit_puts(self):
+ db_file = os.path.join(self.testdir, '1.db')
+ broker = DatabaseBroker(db_file)
+ broker._initialize = MagicMock()
+ broker.initialize(Timestamp.now())
+ with open(broker.pending_file, 'wb'):
+ pass
+
+ # merge given list
+ with patch.object(broker, 'merge_items') as mock_merge_items:
+ broker._commit_puts(['test'])
+ mock_merge_items.assert_called_once_with(['test'])
+
+ # load file and merge
+ with open(broker.pending_file, 'wb') as fd:
+ fd.write(':1:2:99')
+ with patch.object(broker, 'merge_items') as mock_merge_items:
+ broker._commit_puts_load = lambda l, e: l.append(e)
+ broker._commit_puts()
+ mock_merge_items.assert_called_once_with(['1', '2', '99'])
+ self.assertEqual(0, os.path.getsize(broker.pending_file))
+
+ # load file and merge with given list
+ with open(broker.pending_file, 'wb') as fd:
+ fd.write(':bad')
+ with patch.object(broker, 'merge_items') as mock_merge_items:
+ broker._commit_puts_load = lambda l, e: l.append(e)
+ broker._commit_puts(['not'])
+ mock_merge_items.assert_called_once_with(['not', 'bad'])
+ self.assertEqual(0, os.path.getsize(broker.pending_file))
+
+ # skip_commits True - no merge
+ db_file = os.path.join(self.testdir, '2.db')
+ broker = DatabaseBroker(db_file, skip_commits=True)
+ broker._initialize = MagicMock()
+ broker.initialize(Timestamp.now())
+ with open(broker.pending_file, 'wb') as fd:
+ fd.write(':ignored')
+ with patch.object(broker, 'merge_items') as mock_merge_items:
+ with self.assertRaises(DatabaseConnectionError) as cm:
+ broker._commit_puts(['hmmm'])
+ mock_merge_items.assert_not_called()
+ self.assertIn('commits not accepted', str(cm.exception))
+ with open(broker.pending_file, 'rb') as fd:
+ self.assertEqual(':ignored', fd.read())
+
+ def test_put_record(self):
+ db_file = os.path.join(self.testdir, '1.db')
+ broker = DatabaseBroker(db_file)
+ broker._initialize = MagicMock()
+ broker.initialize(Timestamp.now())
+
+ # pending file created and record written
+ broker.make_tuple_for_pickle = lambda x: x.upper()
+ with patch.object(broker, '_commit_puts') as mock_commit_puts:
+ broker.put_record('pinky')
+ mock_commit_puts.assert_not_called()
+ with open(broker.pending_file, 'rb') as fd:
+ pending = fd.read()
+ items = pending.split(':')
+ self.assertEqual(['PINKY'],
+ [pickle.loads(i.decode('base64')) for i in items[1:]])
+
+ # record appended
+ with patch.object(broker, '_commit_puts') as mock_commit_puts:
+ broker.put_record('perky')
+ mock_commit_puts.assert_not_called()
+ with open(broker.pending_file, 'rb') as fd:
+ pending = fd.read()
+ items = pending.split(':')
+ self.assertEqual(['PINKY', 'PERKY'],
+ [pickle.loads(i.decode('base64')) for i in items[1:]])
+
+ # pending file above cap
+ cap = swift.common.db.PENDING_CAP
+ while os.path.getsize(broker.pending_file) < cap:
+ with open(broker.pending_file, 'ab') as fd:
+ fd.write('x' * 100000)
+ with patch.object(broker, '_commit_puts') as mock_commit_puts:
+ broker.put_record('direct')
+ mock_commit_puts.called_once_with(['direct'])
+
+ # records shouldn't be put to brokers with skip_commits True because
+ # they cannot be accepted if the pending file is full
+ broker.skip_commits = True
+ with open(broker.pending_file, 'wb'):
+ # empty the pending file
+ pass
+ with patch.object(broker, '_commit_puts') as mock_commit_puts:
+ with self.assertRaises(DatabaseConnectionError) as cm:
+ broker.put_record('unwelcome')
+ self.assertIn('commits not accepted', str(cm.exception))
+ mock_commit_puts.assert_not_called()
+ with open(broker.pending_file, 'rb') as fd:
+ pending = fd.read()
+ self.assertFalse(pending)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py
index 7c4143d641..21eedb9b7d 100644
--- a/test/unit/common/test_db_replicator.py
+++ b/test/unit/common/test_db_replicator.py
@@ -16,6 +16,8 @@
from __future__ import print_function
import unittest
from contextlib import contextmanager
+
+import eventlet
import os
import logging
import errno
@@ -37,6 +39,7 @@ from swift.common.exceptions import DriveNotMounted
from swift.common.swob import HTTPException
from test import unit
+from test.unit import FakeLogger, attach_fake_replication_rpc
from test.unit.common.test_db import ExampleBroker
@@ -160,6 +163,11 @@ class ReplHttp(object):
self.set_status = set_status
replicated = False
host = 'localhost'
+ node = {
+ 'ip': '127.0.0.1',
+ 'port': '6000',
+ 'device': 'sdb',
+ }
def replicate(self, *args):
self.replicated = True
@@ -230,11 +238,27 @@ class FakeBroker(object):
'put_timestamp': 1,
'created_at': 1,
'count': 0,
+ 'max_row': 99,
+ 'id': 'ID',
+ 'metadata': {}
})
if self.stub_replication_info:
info.update(self.stub_replication_info)
return info
+ def get_max_row(self, table=None):
+ return self.get_replication_info()['max_row']
+
+ def is_reclaimable(self, now, reclaim_age):
+ info = self.get_replication_info()
+ return info['count'] == 0 and (
+ (now - reclaim_age) >
+ info['delete_timestamp'] >
+ info['put_timestamp'])
+
+ def get_other_replication_items(self):
+ return None
+
def reclaim(self, item_timestamp, sync_timestamp):
pass
@@ -249,6 +273,9 @@ class FakeBroker(object):
self.put_timestamp = put_timestamp
self.delete_timestamp = delete_timestamp
+ def get_brokers(self):
+ return [self]
+
class FakeAccountBroker(FakeBroker):
db_type = 'account'
@@ -273,6 +300,7 @@ class TestDBReplicator(unittest.TestCase):
self.recon_cache = mkdtemp()
rmtree(self.recon_cache, ignore_errors=1)
os.mkdir(self.recon_cache)
+ self.logger = unit.debug_logger('test-replicator')
def tearDown(self):
for patcher in self._patchers:
@@ -287,6 +315,7 @@ class TestDBReplicator(unittest.TestCase):
def stub_delete_db(self, broker):
self.delete_db_calls.append('/path/to/file')
+ return True
def test_creation(self):
# later config should be extended to assert more config options
@@ -647,11 +676,107 @@ class TestDBReplicator(unittest.TestCase):
})
def test_replicate_object(self):
+ # verify return values from replicate_object
db_replicator.ring = FakeRingWithNodes()
- replicator = TestReplicator({})
- replicator.delete_db = self.stub_delete_db
- replicator._replicate_object('0', '/path/to/file', 'node_id')
- self.assertEqual([], self.delete_db_calls)
+ db_path = '/path/to/file'
+ replicator = TestReplicator({}, logger=FakeLogger())
+ info = FakeBroker().get_replication_info()
+ # make remote appear to be in sync
+ rinfo = {'point': info['max_row'], 'id': 'remote_id'}
+
+ class FakeResponse(object):
+ def __init__(self, status, rinfo):
+ self._status = status
+ self.data = json.dumps(rinfo)
+
+ @property
+ def status(self):
+ if isinstance(self._status, (Exception, eventlet.Timeout)):
+ raise self._status
+ return self._status
+
+ # all requests fail
+ replicate = 'swift.common.db_replicator.ReplConnection.replicate'
+ with mock.patch(replicate) as fake_replicate:
+ fake_replicate.side_effect = [
+ FakeResponse(500, None),
+ FakeResponse(500, None),
+ FakeResponse(500, None)]
+ with mock.patch.object(replicator, 'delete_db') as mock_delete:
+ res = replicator._replicate_object('0', db_path, 'node_id')
+ self.assertRaises(StopIteration, next, fake_replicate.side_effect)
+ self.assertEqual((False, [False, False, False]), res)
+ self.assertEqual(0, mock_delete.call_count)
+ self.assertFalse(replicator.logger.get_lines_for_level('error'))
+ self.assertFalse(replicator.logger.get_lines_for_level('warning'))
+ replicator.logger.clear()
+
+ with mock.patch(replicate) as fake_replicate:
+ fake_replicate.side_effect = [
+ FakeResponse(Exception('ugh'), None),
+ FakeResponse(eventlet.Timeout(), None),
+ FakeResponse(200, rinfo)]
+ with mock.patch.object(replicator, 'delete_db') as mock_delete:
+ res = replicator._replicate_object('0', db_path, 'node_id')
+ self.assertRaises(StopIteration, next, fake_replicate.side_effect)
+ self.assertEqual((False, [False, False, True]), res)
+ self.assertEqual(0, mock_delete.call_count)
+ lines = replicator.logger.get_lines_for_level('error')
+ self.assertIn('ERROR syncing', lines[0])
+ self.assertIn('ERROR syncing', lines[1])
+ self.assertFalse(lines[2:])
+ self.assertFalse(replicator.logger.get_lines_for_level('warning'))
+ replicator.logger.clear()
+
+ # partial success
+ with mock.patch(replicate) as fake_replicate:
+ fake_replicate.side_effect = [
+ FakeResponse(200, rinfo),
+ FakeResponse(200, rinfo),
+ FakeResponse(500, None)]
+ with mock.patch.object(replicator, 'delete_db') as mock_delete:
+ res = replicator._replicate_object('0', db_path, 'node_id')
+ self.assertRaises(StopIteration, next, fake_replicate.side_effect)
+ self.assertEqual((False, [True, True, False]), res)
+ self.assertEqual(0, mock_delete.call_count)
+ self.assertFalse(replicator.logger.get_lines_for_level('error'))
+ self.assertFalse(replicator.logger.get_lines_for_level('warning'))
+ replicator.logger.clear()
+
+ # 507 triggers additional requests
+ with mock.patch(replicate) as fake_replicate:
+ fake_replicate.side_effect = [
+ FakeResponse(200, rinfo),
+ FakeResponse(200, rinfo),
+ FakeResponse(507, None),
+ FakeResponse(507, None),
+ FakeResponse(200, rinfo)]
+ with mock.patch.object(replicator, 'delete_db') as mock_delete:
+ res = replicator._replicate_object('0', db_path, 'node_id')
+ self.assertRaises(StopIteration, next, fake_replicate.side_effect)
+ self.assertEqual((False, [True, True, False, False, True]), res)
+ self.assertEqual(0, mock_delete.call_count)
+ lines = replicator.logger.get_lines_for_level('error')
+ self.assertIn('Remote drive not mounted', lines[0])
+ self.assertIn('Remote drive not mounted', lines[1])
+ self.assertFalse(lines[2:])
+ self.assertFalse(replicator.logger.get_lines_for_level('warning'))
+ replicator.logger.clear()
+
+ # all requests succeed; node id == 'node_id' causes node to be
+ # considered a handoff so expect the db to be deleted
+ with mock.patch(replicate) as fake_replicate:
+ fake_replicate.side_effect = [
+ FakeResponse(200, rinfo),
+ FakeResponse(200, rinfo),
+ FakeResponse(200, rinfo)]
+ with mock.patch.object(replicator, 'delete_db') as mock_delete:
+ res = replicator._replicate_object('0', db_path, 'node_id')
+ self.assertRaises(StopIteration, next, fake_replicate.side_effect)
+ self.assertEqual((True, [True, True, True]), res)
+ self.assertEqual(1, mock_delete.call_count)
+ self.assertFalse(replicator.logger.get_lines_for_level('error'))
+ self.assertFalse(replicator.logger.get_lines_for_level('warning'))
def test_replicate_object_quarantine(self):
replicator = TestReplicator({})
@@ -695,8 +820,122 @@ class TestDBReplicator(unittest.TestCase):
replicator.brokerclass = FakeAccountBroker
replicator._repl_to_node = lambda *args: True
replicator.delete_db = self.stub_delete_db
- replicator._replicate_object('0', '/path/to/file', 'node_id')
+ orig_cleanup = replicator.cleanup_post_replicate
+ with mock.patch.object(replicator, 'cleanup_post_replicate',
+ side_effect=orig_cleanup) as mock_cleanup:
+ replicator._replicate_object('0', '/path/to/file', 'node_id')
+ mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, [True] * 3)
+ self.assertIsInstance(mock_cleanup.call_args[0][0],
+ replicator.brokerclass)
self.assertEqual(['/path/to/file'], self.delete_db_calls)
+ self.assertEqual(0, replicator.stats['failure'])
+
+ def test_replicate_object_delete_delegated_to_cleanup_post_replicate(self):
+ replicator = TestReplicator({})
+ replicator.ring = FakeRingWithNodes().Ring('path')
+ replicator.brokerclass = FakeAccountBroker
+ replicator._repl_to_node = lambda *args: True
+ replicator.delete_db = self.stub_delete_db
+
+ # cleanup succeeds
+ with mock.patch.object(replicator, 'cleanup_post_replicate',
+ return_value=True) as mock_cleanup:
+ replicator._replicate_object('0', '/path/to/file', 'node_id')
+ mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, [True] * 3)
+ self.assertIsInstance(mock_cleanup.call_args[0][0],
+ replicator.brokerclass)
+ self.assertFalse(self.delete_db_calls)
+ self.assertEqual(0, replicator.stats['failure'])
+ self.assertEqual(3, replicator.stats['success'])
+
+ # cleanup fails
+ replicator._zero_stats()
+ with mock.patch.object(replicator, 'cleanup_post_replicate',
+ return_value=False) as mock_cleanup:
+ replicator._replicate_object('0', '/path/to/file', 'node_id')
+ mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, [True] * 3)
+ self.assertIsInstance(mock_cleanup.call_args[0][0],
+ replicator.brokerclass)
+ self.assertFalse(self.delete_db_calls)
+ self.assertEqual(3, replicator.stats['failure'])
+ self.assertEqual(0, replicator.stats['success'])
+
+ # shouldbehere True - cleanup not required
+ replicator._zero_stats()
+ primary_node_id = replicator.ring.get_part_nodes('0')[0]['id']
+ with mock.patch.object(replicator, 'cleanup_post_replicate',
+ return_value=True) as mock_cleanup:
+ replicator._replicate_object('0', '/path/to/file', primary_node_id)
+ mock_cleanup.assert_not_called()
+ self.assertFalse(self.delete_db_calls)
+ self.assertEqual(0, replicator.stats['failure'])
+ self.assertEqual(2, replicator.stats['success'])
+
+ def test_cleanup_post_replicate(self):
+ replicator = TestReplicator({}, logger=self.logger)
+ replicator.ring = FakeRingWithNodes().Ring('path')
+ broker = FakeBroker()
+ replicator._repl_to_node = lambda *args: True
+ info = broker.get_replication_info()
+
+ with mock.patch.object(replicator, 'delete_db') as mock_delete_db:
+ res = replicator.cleanup_post_replicate(
+ broker, info, [False] * 3)
+ mock_delete_db.assert_not_called()
+ self.assertTrue(res)
+ self.assertEqual(['Not deleting db %s (0/3 success)' % broker.db_file],
+ replicator.logger.get_lines_for_level('debug'))
+ replicator.logger.clear()
+
+ with mock.patch.object(replicator, 'delete_db') as mock_delete_db:
+ res = replicator.cleanup_post_replicate(
+ broker, info, [True, False, True])
+ mock_delete_db.assert_not_called()
+ self.assertTrue(res)
+ self.assertEqual(['Not deleting db %s (2/3 success)' % broker.db_file],
+ replicator.logger.get_lines_for_level('debug'))
+ replicator.logger.clear()
+
+ broker.stub_replication_info = {'max_row': 101}
+ with mock.patch.object(replicator, 'delete_db') as mock_delete_db:
+ res = replicator.cleanup_post_replicate(
+ broker, info, [True] * 3)
+ mock_delete_db.assert_not_called()
+ self.assertTrue(res)
+ self.assertEqual(['Not deleting db %s (2 new rows)' % broker.db_file],
+ replicator.logger.get_lines_for_level('debug'))
+ replicator.logger.clear()
+
+ broker.stub_replication_info = {'max_row': 98}
+ with mock.patch.object(replicator, 'delete_db') as mock_delete_db:
+ res = replicator.cleanup_post_replicate(
+ broker, info, [True] * 3)
+ mock_delete_db.assert_not_called()
+ self.assertTrue(res)
+ broker.stub_replication_info = None
+ self.assertEqual(['Not deleting db %s (negative max_row_delta: -1)' %
+ broker.db_file],
+ replicator.logger.get_lines_for_level('error'))
+ replicator.logger.clear()
+
+ with mock.patch.object(replicator, 'delete_db') as mock_delete_db:
+ res = replicator.cleanup_post_replicate(
+ broker, info, [True] * 3)
+ mock_delete_db.assert_called_once_with(broker)
+ self.assertTrue(res)
+ self.assertEqual(['Successfully deleted db %s' % broker.db_file],
+ replicator.logger.get_lines_for_level('debug'))
+ replicator.logger.clear()
+
+ with mock.patch.object(replicator, 'delete_db',
+ return_value=False) as mock_delete_db:
+ res = replicator.cleanup_post_replicate(
+ broker, info, [True] * 3)
+ mock_delete_db.assert_called_once_with(broker)
+ self.assertFalse(res)
+ self.assertEqual(['Failed to delete db %s' % broker.db_file],
+ replicator.logger.get_lines_for_level('debug'))
+ replicator.logger.clear()
def test_replicate_object_with_exception(self):
replicator = TestReplicator({})
@@ -949,6 +1188,8 @@ class TestDBReplicator(unittest.TestCase):
response = rpc.dispatch(('drive', 'part', 'hash'),
['rsync_then_merge', 'arg1', 'arg2'])
expected_calls = [call('/part/ash/hash/hash.db'),
+ call('/drive/tmp/arg1'),
+ call(FakeBroker.db_file),
call('/drive/tmp/arg1')]
self.assertEqual(mock_os.path.exists.call_args_list,
expected_calls)
@@ -966,7 +1207,7 @@ class TestDBReplicator(unittest.TestCase):
unit.mock_check_drive(isdir=True):
mock_os.path.exists.side_effect = [False, True]
response = rpc.dispatch(('drive', 'part', 'hash'),
- ['complete_rsync', 'arg1', 'arg2'])
+ ['complete_rsync', 'arg1'])
expected_calls = [call('/part/ash/hash/hash.db'),
call('/drive/tmp/arg1')]
self.assertEqual(mock_os.path.exists.call_args_list,
@@ -974,6 +1215,19 @@ class TestDBReplicator(unittest.TestCase):
self.assertEqual('204 No Content', response.status)
self.assertEqual(204, response.status_int)
+ with patch('swift.common.db_replicator.os',
+ new=mock.MagicMock(wraps=os)) as mock_os, \
+ unit.mock_check_drive(isdir=True):
+ mock_os.path.exists.side_effect = [False, True]
+ response = rpc.dispatch(('drive', 'part', 'hash'),
+ ['complete_rsync', 'arg1', 'arg2'])
+ expected_calls = [call('/part/ash/hash/arg2'),
+ call('/drive/tmp/arg1')]
+ self.assertEqual(mock_os.path.exists.call_args_list,
+ expected_calls)
+ self.assertEqual('204 No Content', response.status)
+ self.assertEqual(204, response.status_int)
+
def test_rsync_then_merge_db_does_not_exist(self):
rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker,
mount_check=False)
@@ -1010,7 +1264,8 @@ class TestDBReplicator(unittest.TestCase):
def mock_renamer(old, new):
self.assertEqual('/drive/tmp/arg1', old)
- self.assertEqual('/data/db.db', new)
+ # FakeBroker uses module filename as db_file!
+ self.assertEqual(__file__, new)
self._patch(patch.object, db_replicator, 'renamer', mock_renamer)
@@ -1023,17 +1278,26 @@ class TestDBReplicator(unittest.TestCase):
self.assertEqual('204 No Content', response.status)
self.assertEqual(204, response.status_int)
- def test_complete_rsync_db_does_not_exist(self):
+ def test_complete_rsync_db_exists(self):
rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker,
mount_check=False)
+ with patch('swift.common.db_replicator.os',
+ new=mock.MagicMock(wraps=os)) as mock_os, \
+ unit.mock_check_drive(isdir=True):
+ mock_os.path.exists.return_value = True
+ response = rpc.complete_rsync('drive', '/data/db.db', ['arg1'])
+ mock_os.path.exists.assert_called_with('/data/db.db')
+ self.assertEqual('404 Not Found', response.status)
+ self.assertEqual(404, response.status_int)
+
with patch('swift.common.db_replicator.os',
new=mock.MagicMock(wraps=os)) as mock_os, \
unit.mock_check_drive(isdir=True):
mock_os.path.exists.return_value = True
response = rpc.complete_rsync('drive', '/data/db.db',
['arg1', 'arg2'])
- mock_os.path.exists.assert_called_with('/data/db.db')
+ mock_os.path.exists.assert_called_with('/data/arg2')
self.assertEqual('404 Not Found', response.status)
self.assertEqual(404, response.status_int)
@@ -1046,37 +1310,57 @@ class TestDBReplicator(unittest.TestCase):
unit.mock_check_drive(isdir=True):
mock_os.path.exists.return_value = False
response = rpc.complete_rsync('drive', '/data/db.db',
- ['arg1', 'arg2'])
+ ['arg1'])
expected_calls = [call('/data/db.db'), call('/drive/tmp/arg1')]
self.assertEqual(expected_calls,
mock_os.path.exists.call_args_list)
self.assertEqual('404 Not Found', response.status)
self.assertEqual(404, response.status_int)
+ with patch('swift.common.db_replicator.os',
+ new=mock.MagicMock(wraps=os)) as mock_os, \
+ unit.mock_check_drive(isdir=True):
+ mock_os.path.exists.return_value = False
+ response = rpc.complete_rsync('drive', '/data/db.db',
+ ['arg1', 'arg2'])
+ expected_calls = [call('/data/arg2'), call('/drive/tmp/arg1')]
+ self.assertEqual(expected_calls,
+ mock_os.path.exists.call_args_list)
+ self.assertEqual('404 Not Found', response.status)
+ self.assertEqual(404, response.status_int)
+
def test_complete_rsync_rename(self):
rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker,
mount_check=False)
- def mock_exists(path):
- if path == '/data/db.db':
- return False
- self.assertEqual('/drive/tmp/arg1', path)
- return True
-
def mock_renamer(old, new):
- self.assertEqual('/drive/tmp/arg1', old)
- self.assertEqual('/data/db.db', new)
+ renamer_calls.append((old, new))
self._patch(patch.object, db_replicator, 'renamer', mock_renamer)
+ renamer_calls = []
+ with patch('swift.common.db_replicator.os',
+ new=mock.MagicMock(wraps=os)) as mock_os, \
+ unit.mock_check_drive(isdir=True):
+ mock_os.path.exists.side_effect = [False, True]
+ response = rpc.complete_rsync('drive', '/data/db.db',
+ ['arg1'])
+ self.assertEqual('204 No Content', response.status)
+ self.assertEqual(204, response.status_int)
+ self.assertEqual(('/drive/tmp/arg1', '/data/db.db'), renamer_calls[0])
+ self.assertFalse(renamer_calls[1:])
+
+ renamer_calls = []
with patch('swift.common.db_replicator.os',
new=mock.MagicMock(wraps=os)) as mock_os, \
unit.mock_check_drive(isdir=True):
mock_os.path.exists.side_effect = [False, True]
response = rpc.complete_rsync('drive', '/data/db.db',
['arg1', 'arg2'])
- self.assertEqual('204 No Content', response.status)
- self.assertEqual(204, response.status_int)
+ self.assertEqual('204 No Content', response.status)
+ self.assertEqual(204, response.status_int)
+ self.assertEqual(('/drive/tmp/arg1', '/data/arg2'), renamer_calls[0])
+ self.assertFalse(renamer_calls[1:])
def test_replicator_sync_with_broker_replication_missing_table(self):
rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker,
@@ -1435,10 +1719,10 @@ class TestDBReplicator(unittest.TestCase):
db_file = __file__
replicator = TestReplicator({})
replicator._http_connect(node, partition, db_file)
+ expected_hsh = os.path.basename(db_file).split('.', 1)[0]
+ expected_hsh = expected_hsh.split('_', 1)[0]
db_replicator.ReplConnection.assert_has_calls([
- mock.call(node, partition,
- os.path.basename(db_file).split('.', 1)[0],
- replicator.logger)])
+ mock.call(node, partition, expected_hsh, replicator.logger)])
class TestHandoffsOnly(unittest.TestCase):
@@ -1740,7 +2024,7 @@ class TestReplToNode(unittest.TestCase):
def test_repl_to_node_300_status(self):
self.http = ReplHttp('{"id": 3, "point": -1}', set_status=300)
- self.assertIsNone(self.replicator._repl_to_node(
+ self.assertFalse(self.replicator._repl_to_node(
self.fake_node, FakeBroker(), '0', self.fake_info))
def test_repl_to_node_not_response(self):
@@ -1769,45 +2053,6 @@ class TestReplToNode(unittest.TestCase):
])
-class FakeHTTPResponse(object):
-
- def __init__(self, resp):
- self.resp = resp
-
- @property
- def status(self):
- return self.resp.status_int
-
- @property
- def data(self):
- return self.resp.body
-
-
-def attach_fake_replication_rpc(rpc, replicate_hook=None):
- class FakeReplConnection(object):
-
- def __init__(self, node, partition, hash_, logger):
- self.logger = logger
- self.node = node
- self.partition = partition
- self.path = '/%s/%s/%s' % (node['device'], partition, hash_)
- self.host = node['replication_ip']
-
- def replicate(self, op, *sync_args):
- print('REPLICATE: %s, %s, %r' % (self.path, op, sync_args))
- replicate_args = self.path.lstrip('/').split('/')
- args = [op] + list(sync_args)
- with unit.mock_check_drive(isdir=not rpc.mount_check,
- ismount=rpc.mount_check):
- swob_response = rpc.dispatch(replicate_args, args)
- resp = FakeHTTPResponse(swob_response)
- if replicate_hook:
- replicate_hook(op, *sync_args)
- return resp
-
- return FakeReplConnection
-
-
class ExampleReplicator(db_replicator.Replicator):
server_type = 'fake'
brokerclass = ExampleBroker
@@ -1872,15 +2117,19 @@ class TestReplicatorSync(unittest.TestCase):
conf.update(conf_updates)
return self.replicator_daemon(conf, logger=self.logger)
- def _run_once(self, node, conf_updates=None, daemon=None):
- daemon = daemon or self._get_daemon(node, conf_updates)
-
+ def _install_fake_rsync_file(self, daemon, captured_calls=None):
def _rsync_file(db_file, remote_file, **kwargs):
+ if captured_calls is not None:
+ captured_calls.append((db_file, remote_file, kwargs))
remote_server, remote_path = remote_file.split('/', 1)
dest_path = os.path.join(self.root, remote_path)
copy(db_file, dest_path)
return True
daemon._rsync_file = _rsync_file
+
+ def _run_once(self, node, conf_updates=None, daemon=None):
+ daemon = daemon or self._get_daemon(node, conf_updates)
+ self._install_fake_rsync_file(daemon)
with mock.patch('swift.common.db_replicator.whataremyips',
new=lambda *a, **kw: [node['replication_ip']]), \
unit.mock_check_drive(isdir=not daemon.mount_check,
diff --git a/test/unit/common/test_direct_client.py b/test/unit/common/test_direct_client.py
index a832f31c6f..fc2dffc696 100644
--- a/test/unit/common/test_direct_client.py
+++ b/test/unit/common/test_direct_client.py
@@ -95,6 +95,11 @@ def mocked_http_conn(*args, **kwargs):
yield fake_conn
+@contextmanager
+def noop_timeout(duration):
+ yield
+
+
@patch_policies
class TestDirectClient(unittest.TestCase):
@@ -117,6 +122,10 @@ class TestDirectClient(unittest.TestCase):
self.account, self.container, self.obj))
self.user_agent = 'direct-client %s' % os.getpid()
+ patcher = mock.patch.object(direct_client, 'Timeout', noop_timeout)
+ patcher.start()
+ self.addCleanup(patcher.stop)
+
def test_gen_headers(self):
stub_user_agent = 'direct-client %s' % os.getpid()
@@ -450,6 +459,67 @@ class TestDirectClient(unittest.TestCase):
self.assertEqual(err.http_status, 500)
self.assertTrue('DELETE' in str(err))
+ def test_direct_put_container(self):
+ body = 'Let us begin with a quick introduction'
+ headers = {'x-foo': 'bar', 'Content-Length': str(len(body)),
+ 'Content-Type': 'application/json',
+ 'User-Agent': 'my UA'}
+
+ with mocked_http_conn(204) as conn:
+ rv = direct_client.direct_put_container(
+ self.node, self.part, self.account, self.container,
+ contents=body, headers=headers)
+ self.assertEqual(conn.host, self.node['ip'])
+ self.assertEqual(conn.port, self.node['port'])
+ self.assertEqual(conn.method, 'PUT')
+ self.assertEqual(conn.path, self.container_path)
+ self.assertEqual(conn.req_headers['Content-Length'],
+ str(len(body)))
+ self.assertEqual(conn.req_headers['Content-Type'],
+ 'application/json')
+ self.assertEqual(conn.req_headers['User-Agent'], 'my UA')
+ self.assertTrue('x-timestamp' in conn.req_headers)
+ self.assertEqual('bar', conn.req_headers.get('x-foo'))
+ self.assertEqual(md5(body).hexdigest(), conn.etag.hexdigest())
+ self.assertIsNone(rv)
+
+ def test_direct_put_container_chunked(self):
+ body = 'Let us begin with a quick introduction'
+ headers = {'x-foo': 'bar', 'Content-Type': 'application/json'}
+
+ with mocked_http_conn(204) as conn:
+ rv = direct_client.direct_put_container(
+ self.node, self.part, self.account, self.container,
+ contents=body, headers=headers)
+ self.assertEqual(conn.host, self.node['ip'])
+ self.assertEqual(conn.port, self.node['port'])
+ self.assertEqual(conn.method, 'PUT')
+ self.assertEqual(conn.path, self.container_path)
+ self.assertEqual(conn.req_headers['Transfer-Encoding'], 'chunked')
+ self.assertEqual(conn.req_headers['Content-Type'],
+ 'application/json')
+ self.assertTrue('x-timestamp' in conn.req_headers)
+ self.assertEqual('bar', conn.req_headers.get('x-foo'))
+ self.assertNotIn('Content-Length', conn.req_headers)
+ expected_sent = '%0x\r\n%s\r\n0\r\n\r\n' % (len(body), body)
+ self.assertEqual(md5(expected_sent).hexdigest(),
+ conn.etag.hexdigest())
+ self.assertIsNone(rv)
+
+ def test_direct_put_container_fail(self):
+ with mock.patch('swift.common.bufferedhttp.http_connect_raw',
+ side_effect=Exception('conn failed')):
+ with self.assertRaises(Exception) as cm:
+ direct_client.direct_put_container(
+ self.node, self.part, self.account, self.container)
+ self.assertEqual('conn failed', str(cm.exception))
+
+ with mocked_http_conn(Exception('resp failed')):
+ with self.assertRaises(Exception) as cm:
+ direct_client.direct_put_container(
+ self.node, self.part, self.account, self.container)
+ self.assertEqual('resp failed', str(cm.exception))
+
def test_direct_put_container_object(self):
headers = {'x-foo': 'bar'}
diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py
index b9caaabf34..7abad33ec2 100644
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -15,7 +15,11 @@
"""Tests for swift.common.utils"""
from __future__ import print_function
-from test.unit import temptree, debug_logger, make_timestamp_iter, with_tempdir
+
+import hashlib
+
+from test.unit import temptree, debug_logger, make_timestamp_iter, \
+ with_tempdir, mock_timestamp_now
import ctypes
import contextlib
@@ -1454,6 +1458,15 @@ class TestUtils(unittest.TestCase):
with open(testcache_file) as fd:
file_dict = json.loads(fd.readline())
self.assertEqual(expect_dict, file_dict)
+ # nested dict items are not sticky
+ submit_dict = {'key1': {'key2': {'value3': 3}}}
+ expect_dict = {'key0': 101,
+ 'key1': {'key2': {'value3': 3},
+ 'value1': 1, 'value2': 2}}
+ utils.dump_recon_cache(submit_dict, testcache_file, logger)
+ with open(testcache_file) as fd:
+ file_dict = json.loads(fd.readline())
+ self.assertEqual(expect_dict, file_dict)
# cached entries are sticky
submit_dict = {}
utils.dump_recon_cache(submit_dict, testcache_file, logger)
@@ -2753,6 +2766,53 @@ cluster_dfw1 = http://dfw1.host/v1/
else:
self.assertEqual(expected, rv)
+ def test_config_float_value(self):
+ for args, expected in (
+ ((99, None, None), 99.0),
+ ((99.01, None, None), 99.01),
+ (('99', None, None), 99.0),
+ (('99.01', None, None), 99.01),
+ ((99, 99, None), 99.0),
+ ((99.01, 99.01, None), 99.01),
+ (('99', 99, None), 99.0),
+ (('99.01', 99.01, None), 99.01),
+ ((99, None, 99), 99.0),
+ ((99.01, None, 99.01), 99.01),
+ (('99', None, 99), 99.0),
+ (('99.01', None, 99.01), 99.01),
+ ((-99, -99, -99), -99.0),
+ ((-99.01, -99.01, -99.01), -99.01),
+ (('-99', -99, -99), -99.0),
+ (('-99.01', -99.01, -99.01), -99.01),):
+ actual = utils.config_float_value(*args)
+ self.assertEqual(expected, actual)
+
+ for val, minimum in ((99, 100),
+ ('99', 100),
+ (-99, -98),
+ ('-98.01', -98)):
+ with self.assertRaises(ValueError) as cm:
+ utils.config_float_value(val, minimum=minimum)
+ self.assertIn('greater than %s' % minimum, cm.exception.args[0])
+ self.assertNotIn('less than', cm.exception.args[0])
+
+ for val, maximum in ((99, 98),
+ ('99', 98),
+ (-99, -100),
+ ('-97.9', -98)):
+ with self.assertRaises(ValueError) as cm:
+ utils.config_float_value(val, maximum=maximum)
+ self.assertIn('less than %s' % maximum, cm.exception.args[0])
+ self.assertNotIn('greater than', cm.exception.args[0])
+
+ for val, minimum, maximum in ((99, 99, 98),
+ ('99', 100, 100),
+ (99, 98, 98),):
+ with self.assertRaises(ValueError) as cm:
+ utils.config_float_value(val, minimum=minimum, maximum=maximum)
+ self.assertIn('greater than %s' % minimum, cm.exception.args[0])
+ self.assertIn('less than %s' % maximum, cm.exception.args[0])
+
def test_config_auto_int_value(self):
expectations = {
# (value, default) : expected,
@@ -3807,6 +3867,105 @@ cluster_dfw1 = http://dfw1.host/v1/
if tempdir:
shutil.rmtree(tempdir)
+ def test_find_shard_range(self):
+ ts = utils.Timestamp.now().internal
+ start = utils.ShardRange('a/-a', ts, '', 'a')
+ atof = utils.ShardRange('a/a-f', ts, 'a', 'f')
+ ftol = utils.ShardRange('a/f-l', ts, 'f', 'l')
+ ltor = utils.ShardRange('a/l-r', ts, 'l', 'r')
+ rtoz = utils.ShardRange('a/r-z', ts, 'r', 'z')
+ end = utils.ShardRange('a/z-', ts, 'z', '')
+ ranges = [start, atof, ftol, ltor, rtoz, end]
+
+ found = utils.find_shard_range('', ranges)
+ self.assertEqual(found, None)
+ found = utils.find_shard_range(' ', ranges)
+ self.assertEqual(found, start)
+ found = utils.find_shard_range(' ', ranges[1:])
+ self.assertEqual(found, None)
+ found = utils.find_shard_range('b', ranges)
+ self.assertEqual(found, atof)
+ found = utils.find_shard_range('f', ranges)
+ self.assertEqual(found, atof)
+ found = utils.find_shard_range('f\x00', ranges)
+ self.assertEqual(found, ftol)
+ found = utils.find_shard_range('x', ranges)
+ self.assertEqual(found, rtoz)
+ found = utils.find_shard_range('r', ranges)
+ self.assertEqual(found, ltor)
+ found = utils.find_shard_range('}', ranges)
+ self.assertEqual(found, end)
+ found = utils.find_shard_range('}', ranges[:-1])
+ self.assertEqual(found, None)
+ # remove l-r from list of ranges and try and find a shard range for an
+ # item in that range.
+ found = utils.find_shard_range('p', ranges[:-3] + ranges[-2:])
+ self.assertEqual(found, None)
+
+ # add some sub-shards; a sub-shard's state is less than its parent
+ # while the parent is undeleted, so insert these ahead of the
+ # overlapping parent in the list of ranges
+ ftoh = utils.ShardRange('a/f-h', ts, 'f', 'h')
+ htok = utils.ShardRange('a/h-k', ts, 'h', 'k')
+
+ overlapping_ranges = ranges[:2] + [ftoh, htok] + ranges[2:]
+ found = utils.find_shard_range('g', overlapping_ranges)
+ self.assertEqual(found, ftoh)
+ found = utils.find_shard_range('h', overlapping_ranges)
+ self.assertEqual(found, ftoh)
+ found = utils.find_shard_range('k', overlapping_ranges)
+ self.assertEqual(found, htok)
+ found = utils.find_shard_range('l', overlapping_ranges)
+ self.assertEqual(found, ftol)
+ found = utils.find_shard_range('m', overlapping_ranges)
+ self.assertEqual(found, ltor)
+
+ ktol = utils.ShardRange('a/k-l', ts, 'k', 'l')
+ overlapping_ranges = ranges[:2] + [ftoh, htok, ktol] + ranges[2:]
+ found = utils.find_shard_range('l', overlapping_ranges)
+ self.assertEqual(found, ktol)
+
+ def test_parse_db_filename(self):
+ actual = utils.parse_db_filename('hash.db')
+ self.assertEqual(('hash', None, '.db'), actual)
+ actual = utils.parse_db_filename('hash_1234567890.12345.db')
+ self.assertEqual(('hash', '1234567890.12345', '.db'), actual)
+ actual = utils.parse_db_filename(
+ '/dev/containers/part/ash/hash/hash_1234567890.12345.db')
+ self.assertEqual(('hash', '1234567890.12345', '.db'), actual)
+ self.assertRaises(ValueError, utils.parse_db_filename, '/path/to/dir/')
+ # These shouldn't come up in practice; included for completeness
+ self.assertEqual(utils.parse_db_filename('hashunder_.db'),
+ ('hashunder', '', '.db'))
+ self.assertEqual(utils.parse_db_filename('lots_of_underscores.db'),
+ ('lots', 'of', '.db'))
+
+ def test_make_db_file_path(self):
+ epoch = utils.Timestamp.now()
+ actual = utils.make_db_file_path('hash.db', epoch)
+ self.assertEqual('hash_%s.db' % epoch.internal, actual)
+
+ actual = utils.make_db_file_path('hash_oldepoch.db', epoch)
+ self.assertEqual('hash_%s.db' % epoch.internal, actual)
+
+ actual = utils.make_db_file_path('/path/to/hash.db', epoch)
+ self.assertEqual('/path/to/hash_%s.db' % epoch.internal, actual)
+
+ epoch = utils.Timestamp.now()
+ actual = utils.make_db_file_path(actual, epoch)
+ self.assertEqual('/path/to/hash_%s.db' % epoch.internal, actual)
+
+ # epochs shouldn't have offsets
+ epoch = utils.Timestamp.now(offset=10)
+ actual = utils.make_db_file_path(actual, epoch)
+ self.assertEqual('/path/to/hash_%s.db' % epoch.normal, actual)
+
+ self.assertRaises(ValueError, utils.make_db_file_path,
+ '/path/to/hash.db', 'bad epoch')
+
+ self.assertRaises(ValueError, utils.make_db_file_path,
+ '/path/to/hash.db', None)
+
def test_modify_priority(self):
pid = os.getpid()
logger = debug_logger()
@@ -4097,6 +4256,115 @@ cluster_dfw1 = http://dfw1.host/v1/
# iterators
self.assertListEqual([1, 4, 6, 2, 5, 7, 3, 8, 9], got)
+ @with_tempdir
+ def test_get_db_files(self, tempdir):
+ dbdir = os.path.join(tempdir, 'dbdir')
+ self.assertEqual([], utils.get_db_files(dbdir))
+ path_1 = os.path.join(dbdir, 'dbfile.db')
+ self.assertEqual([], utils.get_db_files(path_1))
+ os.mkdir(dbdir)
+ self.assertEqual([], utils.get_db_files(path_1))
+ with open(path_1, 'wb'):
+ pass
+ self.assertEqual([path_1], utils.get_db_files(path_1))
+
+ path_2 = os.path.join(dbdir, 'dbfile_2.db')
+ self.assertEqual([path_1], utils.get_db_files(path_2))
+
+ with open(path_2, 'wb'):
+ pass
+
+ self.assertEqual([path_1, path_2], utils.get_db_files(path_1))
+ self.assertEqual([path_1, path_2], utils.get_db_files(path_2))
+
+ path_3 = os.path.join(dbdir, 'dbfile_3.db')
+ self.assertEqual([path_1, path_2], utils.get_db_files(path_3))
+
+ with open(path_3, 'wb'):
+ pass
+
+ self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_1))
+ self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_2))
+ self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_3))
+
+ other_hash = os.path.join(dbdir, 'other.db')
+ self.assertEqual([], utils.get_db_files(other_hash))
+ other_hash = os.path.join(dbdir, 'other_1.db')
+ self.assertEqual([], utils.get_db_files(other_hash))
+
+ pending = os.path.join(dbdir, 'dbfile.pending')
+ self.assertEqual([path_1, path_2, path_3], utils.get_db_files(pending))
+
+ with open(pending, 'wb'):
+ pass
+ self.assertEqual([path_1, path_2, path_3], utils.get_db_files(pending))
+
+ self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_1))
+ self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_2))
+ self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_3))
+ self.assertEqual([], utils.get_db_files(dbdir))
+
+ os.unlink(path_1)
+ self.assertEqual([path_2, path_3], utils.get_db_files(path_1))
+ self.assertEqual([path_2, path_3], utils.get_db_files(path_2))
+ self.assertEqual([path_2, path_3], utils.get_db_files(path_3))
+
+ os.unlink(path_2)
+ self.assertEqual([path_3], utils.get_db_files(path_1))
+ self.assertEqual([path_3], utils.get_db_files(path_2))
+ self.assertEqual([path_3], utils.get_db_files(path_3))
+
+ os.unlink(path_3)
+ self.assertEqual([], utils.get_db_files(path_1))
+ self.assertEqual([], utils.get_db_files(path_2))
+ self.assertEqual([], utils.get_db_files(path_3))
+ self.assertEqual([], utils.get_db_files('/path/to/nowhere'))
+
+ def test_get_redirect_data(self):
+ ts_now = utils.Timestamp.now()
+ headers = {'X-Backend-Redirect-Timestamp': ts_now.internal}
+ response = FakeResponse(200, headers, '')
+ self.assertIsNone(utils.get_redirect_data(response))
+
+ headers = {'Location': '/a/c/o',
+ 'X-Backend-Redirect-Timestamp': ts_now.internal}
+ response = FakeResponse(200, headers, '')
+ path, ts = utils.get_redirect_data(response)
+ self.assertEqual('a/c', path)
+ self.assertEqual(ts_now, ts)
+
+ headers = {'Location': '/a/c',
+ 'X-Backend-Redirect-Timestamp': ts_now.internal}
+ response = FakeResponse(200, headers, '')
+ path, ts = utils.get_redirect_data(response)
+ self.assertEqual('a/c', path)
+ self.assertEqual(ts_now, ts)
+
+ def do_test(headers):
+ response = FakeResponse(200, headers, '')
+ with self.assertRaises(ValueError) as cm:
+ utils.get_redirect_data(response)
+ return cm.exception
+
+ exc = do_test({'Location': '/a',
+ 'X-Backend-Redirect-Timestamp': ts_now.internal})
+ self.assertIn('Invalid path', str(exc))
+
+ exc = do_test({'Location': '',
+ 'X-Backend-Redirect-Timestamp': ts_now.internal})
+ self.assertIn('Invalid path', str(exc))
+
+ exc = do_test({'Location': '/a/c',
+ 'X-Backend-Redirect-Timestamp': 'bad'})
+ self.assertIn('Invalid timestamp', str(exc))
+
+ exc = do_test({'Location': '/a/c'})
+ self.assertIn('Invalid timestamp', str(exc))
+
+ exc = do_test({'Location': '/a/c',
+ 'X-Backend-Redirect-Timestamp': '-1'})
+ self.assertIn('Invalid timestamp', str(exc))
+
class ResellerConfReader(unittest.TestCase):
@@ -6656,5 +6924,828 @@ class TestDistributeEvenly(unittest.TestCase):
self.assertEqual(out, [[0], [1], [2], [3], [4], [], []])
+class TestShardRange(unittest.TestCase):
+ def setUp(self):
+ self.ts_iter = make_timestamp_iter()
+
+ def test_min_max_bounds(self):
+ # max
+ self.assertEqual(utils.ShardRange.MAX, utils.ShardRange.MAX)
+ self.assertFalse(utils.ShardRange.MAX > utils.ShardRange.MAX)
+ self.assertFalse(utils.ShardRange.MAX < utils.ShardRange.MAX)
+
+ for val in 'z', u'\u00e4':
+ self.assertFalse(utils.ShardRange.MAX == val)
+ self.assertFalse(val > utils.ShardRange.MAX)
+ self.assertTrue(val < utils.ShardRange.MAX)
+ self.assertTrue(utils.ShardRange.MAX > val)
+ self.assertFalse(utils.ShardRange.MAX < val)
+
+ self.assertEqual('', str(utils.ShardRange.MAX))
+ self.assertFalse(utils.ShardRange.MAX)
+ self.assertTrue(utils.ShardRange.MAX == utils.ShardRange.MAX)
+ self.assertFalse(utils.ShardRange.MAX != utils.ShardRange.MAX)
+ self.assertTrue(
+ utils.ShardRange.MaxBound() == utils.ShardRange.MaxBound())
+ self.assertFalse(
+ utils.ShardRange.MaxBound() != utils.ShardRange.MaxBound())
+
+ # min
+ self.assertEqual(utils.ShardRange.MIN, utils.ShardRange.MIN)
+ self.assertFalse(utils.ShardRange.MIN > utils.ShardRange.MIN)
+ self.assertFalse(utils.ShardRange.MIN < utils.ShardRange.MIN)
+
+ for val in 'z', u'\u00e4':
+ self.assertFalse(utils.ShardRange.MIN == val)
+ self.assertFalse(val < utils.ShardRange.MIN)
+ self.assertTrue(val > utils.ShardRange.MIN)
+ self.assertTrue(utils.ShardRange.MIN < val)
+ self.assertFalse(utils.ShardRange.MIN > val)
+ self.assertFalse(utils.ShardRange.MIN)
+
+ self.assertEqual('', str(utils.ShardRange.MIN))
+ self.assertFalse(utils.ShardRange.MIN)
+ self.assertTrue(utils.ShardRange.MIN == utils.ShardRange.MIN)
+ self.assertFalse(utils.ShardRange.MIN != utils.ShardRange.MIN)
+ self.assertTrue(
+ utils.ShardRange.MinBound() == utils.ShardRange.MinBound())
+ self.assertFalse(
+ utils.ShardRange.MinBound() != utils.ShardRange.MinBound())
+
+ self.assertFalse(utils.ShardRange.MAX == utils.ShardRange.MIN)
+ self.assertFalse(utils.ShardRange.MIN == utils.ShardRange.MAX)
+ self.assertTrue(utils.ShardRange.MAX != utils.ShardRange.MIN)
+ self.assertTrue(utils.ShardRange.MIN != utils.ShardRange.MAX)
+
+ self.assertEqual(utils.ShardRange.MAX,
+ max(utils.ShardRange.MIN, utils.ShardRange.MAX))
+ self.assertEqual(utils.ShardRange.MIN,
+ min(utils.ShardRange.MIN, utils.ShardRange.MAX))
+
+ def test_shard_range_initialisation(self):
+ def assert_initialisation_ok(params, expected):
+ pr = utils.ShardRange(**params)
+ self.assertDictEqual(dict(pr), expected)
+
+ def assert_initialisation_fails(params, err_type=ValueError):
+ with self.assertRaises(err_type):
+ utils.ShardRange(**params)
+
+ ts_1 = next(self.ts_iter)
+ ts_2 = next(self.ts_iter)
+ ts_3 = next(self.ts_iter)
+ ts_4 = next(self.ts_iter)
+ empty_run = dict(name=None, timestamp=None, lower=None,
+ upper=None, object_count=0, bytes_used=0,
+ meta_timestamp=None, deleted=0,
+ state=utils.ShardRange.FOUND, state_timestamp=None,
+ epoch=None)
+ # name, timestamp must be given
+ assert_initialisation_fails(empty_run.copy())
+ assert_initialisation_fails(dict(empty_run, name='a/c'), TypeError)
+ assert_initialisation_fails(dict(empty_run, timestamp=ts_1))
+ # name must be form a/c
+ assert_initialisation_fails(dict(empty_run, name='c', timestamp=ts_1))
+ assert_initialisation_fails(dict(empty_run, name='', timestamp=ts_1))
+ assert_initialisation_fails(dict(empty_run, name='/a/c',
+ timestamp=ts_1))
+ assert_initialisation_fails(dict(empty_run, name='/c',
+ timestamp=ts_1))
+ # lower, upper can be None
+ expect = dict(name='a/c', timestamp=ts_1.internal, lower='',
+ upper='', object_count=0, bytes_used=0,
+ meta_timestamp=ts_1.internal, deleted=0,
+ state=utils.ShardRange.FOUND,
+ state_timestamp=ts_1.internal, epoch=None)
+ assert_initialisation_ok(dict(empty_run, name='a/c', timestamp=ts_1),
+ expect)
+ assert_initialisation_ok(dict(name='a/c', timestamp=ts_1), expect)
+
+ good_run = dict(name='a/c', timestamp=ts_1, lower='l',
+ upper='u', object_count=2, bytes_used=10,
+ meta_timestamp=ts_2, deleted=0,
+ state=utils.ShardRange.CREATED,
+ state_timestamp=ts_3.internal, epoch=ts_4)
+ expect.update({'lower': 'l', 'upper': 'u', 'object_count': 2,
+ 'bytes_used': 10, 'meta_timestamp': ts_2.internal,
+ 'state': utils.ShardRange.CREATED,
+ 'state_timestamp': ts_3.internal, 'epoch': ts_4})
+ assert_initialisation_ok(good_run.copy(), expect)
+
+ # obj count and bytes used as int strings
+ good_str_run = good_run.copy()
+ good_str_run.update({'object_count': '2', 'bytes_used': '10'})
+ assert_initialisation_ok(good_str_run, expect)
+
+ good_no_meta = good_run.copy()
+ good_no_meta.pop('meta_timestamp')
+ assert_initialisation_ok(good_no_meta,
+ dict(expect, meta_timestamp=ts_1.internal))
+
+ good_deleted = good_run.copy()
+ good_deleted['deleted'] = 1
+ assert_initialisation_ok(good_deleted,
+ dict(expect, deleted=1))
+
+ assert_initialisation_fails(dict(good_run, timestamp='water balloon'))
+
+ assert_initialisation_fails(
+ dict(good_run, meta_timestamp='water balloon'))
+
+ assert_initialisation_fails(dict(good_run, lower='water balloon'))
+
+ assert_initialisation_fails(dict(good_run, upper='balloon'))
+
+ assert_initialisation_fails(
+ dict(good_run, object_count='water balloon'))
+
+ assert_initialisation_fails(dict(good_run, bytes_used='water ballon'))
+
+ assert_initialisation_fails(dict(good_run, object_count=-1))
+
+ assert_initialisation_fails(dict(good_run, bytes_used=-1))
+ assert_initialisation_fails(dict(good_run, state=-1))
+ assert_initialisation_fails(dict(good_run, state_timestamp='not a ts'))
+ assert_initialisation_fails(dict(good_run, name='/a/c'))
+ assert_initialisation_fails(dict(good_run, name='/a/c/'))
+ assert_initialisation_fails(dict(good_run, name='a/c/'))
+ assert_initialisation_fails(dict(good_run, name='a'))
+ assert_initialisation_fails(dict(good_run, name=''))
+
+ def _check_to_from_dict(self, lower, upper):
+ ts_1 = next(self.ts_iter)
+ ts_2 = next(self.ts_iter)
+ ts_3 = next(self.ts_iter)
+ ts_4 = next(self.ts_iter)
+ sr = utils.ShardRange('a/test', ts_1, lower, upper, 10, 100, ts_2,
+ state=None, state_timestamp=ts_3, epoch=ts_4)
+ sr_dict = dict(sr)
+ expected = {
+ 'name': 'a/test', 'timestamp': ts_1.internal, 'lower': lower,
+ 'upper': upper, 'object_count': 10, 'bytes_used': 100,
+ 'meta_timestamp': ts_2.internal, 'deleted': 0,
+ 'state': utils.ShardRange.FOUND, 'state_timestamp': ts_3.internal,
+ 'epoch': ts_4}
+ self.assertEqual(expected, sr_dict)
+ self.assertIsInstance(sr_dict['lower'], six.string_types)
+ self.assertIsInstance(sr_dict['upper'], six.string_types)
+ sr_new = utils.ShardRange.from_dict(sr_dict)
+ self.assertEqual(sr, sr_new)
+ self.assertEqual(sr_dict, dict(sr_new))
+
+ sr_new = utils.ShardRange(**sr_dict)
+ self.assertEqual(sr, sr_new)
+ self.assertEqual(sr_dict, dict(sr_new))
+
+ for key in sr_dict:
+ bad_dict = dict(sr_dict)
+ bad_dict.pop(key)
+ with self.assertRaises(KeyError):
+ utils.ShardRange.from_dict(bad_dict)
+ # But __init__ still (generally) works!
+ if key not in ('name', 'timestamp'):
+ utils.ShardRange(**bad_dict)
+ else:
+ with self.assertRaises(TypeError):
+ utils.ShardRange(**bad_dict)
+
+ def test_to_from_dict(self):
+ self._check_to_from_dict('l', 'u')
+ self._check_to_from_dict('', '')
+
+ def test_timestamp_setter(self):
+ ts_1 = next(self.ts_iter)
+ sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None)
+ self.assertEqual(ts_1, sr.timestamp)
+
+ ts_2 = next(self.ts_iter)
+ sr.timestamp = ts_2
+ self.assertEqual(ts_2, sr.timestamp)
+
+ sr.timestamp = 0
+ self.assertEqual(utils.Timestamp(0), sr.timestamp)
+
+ with self.assertRaises(TypeError):
+ sr.timestamp = None
+
+ def test_meta_timestamp_setter(self):
+ ts_1 = next(self.ts_iter)
+ sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None)
+ self.assertEqual(ts_1, sr.timestamp)
+ self.assertEqual(ts_1, sr.meta_timestamp)
+
+ ts_2 = next(self.ts_iter)
+ sr.meta_timestamp = ts_2
+ self.assertEqual(ts_1, sr.timestamp)
+ self.assertEqual(ts_2, sr.meta_timestamp)
+
+ ts_3 = next(self.ts_iter)
+ sr.timestamp = ts_3
+ self.assertEqual(ts_3, sr.timestamp)
+ self.assertEqual(ts_2, sr.meta_timestamp)
+
+ # meta_timestamp defaults to tracking timestamp
+ sr.meta_timestamp = None
+ self.assertEqual(ts_3, sr.timestamp)
+ self.assertEqual(ts_3, sr.meta_timestamp)
+ ts_4 = next(self.ts_iter)
+ sr.timestamp = ts_4
+ self.assertEqual(ts_4, sr.timestamp)
+ self.assertEqual(ts_4, sr.meta_timestamp)
+
+ sr.meta_timestamp = 0
+ self.assertEqual(ts_4, sr.timestamp)
+ self.assertEqual(utils.Timestamp(0), sr.meta_timestamp)
+
+ def test_update_meta(self):
+ ts_1 = next(self.ts_iter)
+ sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None)
+ with mock_timestamp_now(next(self.ts_iter)) as now:
+ sr.update_meta(9, 99)
+ self.assertEqual(9, sr.object_count)
+ self.assertEqual(99, sr.bytes_used)
+ self.assertEqual(now, sr.meta_timestamp)
+
+ with mock_timestamp_now(next(self.ts_iter)) as now:
+ sr.update_meta(99, 999, None)
+ self.assertEqual(99, sr.object_count)
+ self.assertEqual(999, sr.bytes_used)
+ self.assertEqual(now, sr.meta_timestamp)
+
+ ts_2 = next(self.ts_iter)
+ sr.update_meta(21, 2112, ts_2)
+ self.assertEqual(21, sr.object_count)
+ self.assertEqual(2112, sr.bytes_used)
+ self.assertEqual(ts_2, sr.meta_timestamp)
+
+ sr.update_meta('11', '12')
+ self.assertEqual(11, sr.object_count)
+ self.assertEqual(12, sr.bytes_used)
+
+ def check_bad_args(*args):
+ with self.assertRaises(ValueError):
+ sr.update_meta(*args)
+ check_bad_args('bad', 10)
+ check_bad_args(10, 'bad')
+ check_bad_args(10, 11, 'bad')
+
+ def test_increment_meta(self):
+ ts_1 = next(self.ts_iter)
+ sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 1, 2, None)
+ with mock_timestamp_now(next(self.ts_iter)) as now:
+ sr.increment_meta(9, 99)
+ self.assertEqual(10, sr.object_count)
+ self.assertEqual(101, sr.bytes_used)
+ self.assertEqual(now, sr.meta_timestamp)
+
+ sr.increment_meta('11', '12')
+ self.assertEqual(21, sr.object_count)
+ self.assertEqual(113, sr.bytes_used)
+
+ def check_bad_args(*args):
+ with self.assertRaises(ValueError):
+ sr.increment_meta(*args)
+ check_bad_args('bad', 10)
+ check_bad_args(10, 'bad')
+
+ def test_state_timestamp_setter(self):
+ ts_1 = next(self.ts_iter)
+ sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None)
+ self.assertEqual(ts_1, sr.timestamp)
+ self.assertEqual(ts_1, sr.state_timestamp)
+
+ ts_2 = next(self.ts_iter)
+ sr.state_timestamp = ts_2
+ self.assertEqual(ts_1, sr.timestamp)
+ self.assertEqual(ts_2, sr.state_timestamp)
+
+ ts_3 = next(self.ts_iter)
+ sr.timestamp = ts_3
+ self.assertEqual(ts_3, sr.timestamp)
+ self.assertEqual(ts_2, sr.state_timestamp)
+
+ # state_timestamp defaults to tracking timestamp
+ sr.state_timestamp = None
+ self.assertEqual(ts_3, sr.timestamp)
+ self.assertEqual(ts_3, sr.state_timestamp)
+ ts_4 = next(self.ts_iter)
+ sr.timestamp = ts_4
+ self.assertEqual(ts_4, sr.timestamp)
+ self.assertEqual(ts_4, sr.state_timestamp)
+
+ sr.state_timestamp = 0
+ self.assertEqual(ts_4, sr.timestamp)
+ self.assertEqual(utils.Timestamp(0), sr.state_timestamp)
+
+ def test_state_setter(self):
+ for state in utils.ShardRange.STATES:
+ for test_value in (state, str(state)):
+ sr = utils.ShardRange('a/test', next(self.ts_iter), 'l', 'u')
+ sr.state = test_value
+ actual = sr.state
+ self.assertEqual(
+ state, actual,
+ 'Expected %s but got %s for %s' %
+ (state, actual, test_value)
+ )
+
+ for bad_state in (max(utils.ShardRange.STATES) + 1,
+ -1, 99, None, 'stringy', 1.1):
+ sr = utils.ShardRange('a/test', next(self.ts_iter), 'l', 'u')
+ with self.assertRaises(ValueError) as cm:
+ sr.state = bad_state
+ self.assertIn('Invalid state', str(cm.exception))
+
+ def test_update_state(self):
+ sr = utils.ShardRange('a/c', next(self.ts_iter))
+ old_sr = sr.copy()
+ self.assertEqual(utils.ShardRange.FOUND, sr.state)
+ self.assertEqual(dict(sr), dict(old_sr)) # sanity check
+
+ for state in utils.ShardRange.STATES:
+ if state == utils.ShardRange.FOUND:
+ continue
+ self.assertTrue(sr.update_state(state))
+ self.assertEqual(dict(old_sr, state=state), dict(sr))
+ self.assertFalse(sr.update_state(state))
+ self.assertEqual(dict(old_sr, state=state), dict(sr))
+
+ sr = utils.ShardRange('a/c', next(self.ts_iter))
+ old_sr = sr.copy()
+ for state in utils.ShardRange.STATES:
+ ts = next(self.ts_iter)
+ self.assertTrue(sr.update_state(state, state_timestamp=ts))
+ self.assertEqual(dict(old_sr, state=state, state_timestamp=ts),
+ dict(sr))
+
+ def test_resolve_state(self):
+ for name, number in utils.ShardRange.STATES_BY_NAME.items():
+ self.assertEqual(
+ (number, name), utils.ShardRange.resolve_state(name))
+ self.assertEqual(
+ (number, name), utils.ShardRange.resolve_state(name.upper()))
+ self.assertEqual(
+ (number, name), utils.ShardRange.resolve_state(name.title()))
+ self.assertEqual(
+ (number, name), utils.ShardRange.resolve_state(number))
+
+ def check_bad_value(value):
+ with self.assertRaises(ValueError) as cm:
+ utils.ShardRange.resolve_state(value)
+ self.assertIn('Invalid state %r' % value, str(cm.exception))
+
+ check_bad_value(min(utils.ShardRange.STATES) - 1)
+ check_bad_value(max(utils.ShardRange.STATES) + 1)
+ check_bad_value('badstate')
+
+ def test_epoch_setter(self):
+ sr = utils.ShardRange('a/c', next(self.ts_iter))
+ self.assertIsNone(sr.epoch)
+ ts = next(self.ts_iter)
+ sr.epoch = ts
+ self.assertEqual(ts, sr.epoch)
+ ts = next(self.ts_iter)
+ sr.epoch = ts.internal
+ self.assertEqual(ts, sr.epoch)
+ sr.epoch = None
+ self.assertIsNone(sr.epoch)
+ with self.assertRaises(ValueError):
+ sr.epoch = 'bad'
+
+ def test_deleted_setter(self):
+ sr = utils.ShardRange('a/c', next(self.ts_iter))
+ for val in (True, 1):
+ sr.deleted = val
+ self.assertIs(True, sr.deleted)
+ for val in (False, 0, None):
+ sr.deleted = val
+ self.assertIs(False, sr.deleted)
+
+ def test_set_deleted(self):
+ sr = utils.ShardRange('a/c', next(self.ts_iter))
+ # initialise other timestamps
+ sr.update_state(utils.ShardRange.ACTIVE,
+ state_timestamp=utils.Timestamp.now())
+ sr.update_meta(1, 2)
+ old_sr = sr.copy()
+ self.assertIs(False, sr.deleted) # sanity check
+ self.assertEqual(dict(sr), dict(old_sr)) # sanity check
+
+ with mock_timestamp_now(next(self.ts_iter)) as now:
+ self.assertTrue(sr.set_deleted())
+ self.assertEqual(now, sr.timestamp)
+ self.assertIs(True, sr.deleted)
+ old_sr_dict = dict(old_sr)
+ old_sr_dict.pop('deleted')
+ old_sr_dict.pop('timestamp')
+ sr_dict = dict(sr)
+ sr_dict.pop('deleted')
+ sr_dict.pop('timestamp')
+ self.assertEqual(old_sr_dict, sr_dict)
+
+ # no change
+ self.assertFalse(sr.set_deleted())
+ self.assertEqual(now, sr.timestamp)
+ self.assertIs(True, sr.deleted)
+
+ # force timestamp change
+ with mock_timestamp_now(next(self.ts_iter)) as now:
+ self.assertTrue(sr.set_deleted(timestamp=now))
+ self.assertEqual(now, sr.timestamp)
+ self.assertIs(True, sr.deleted)
+
+ def test_lower_setter(self):
+ sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', '')
+ # sanity checks
+ self.assertEqual('b', sr.lower)
+ self.assertEqual(sr.MAX, sr.upper)
+
+ def do_test(good_value, expected):
+ sr.lower = good_value
+ self.assertEqual(expected, sr.lower)
+ self.assertEqual(sr.MAX, sr.upper)
+
+ do_test(utils.ShardRange.MIN, utils.ShardRange.MIN)
+ do_test(utils.ShardRange.MAX, utils.ShardRange.MAX)
+ do_test('', utils.ShardRange.MIN)
+ do_test(u'', utils.ShardRange.MIN)
+ do_test(None, utils.ShardRange.MIN)
+ do_test('a', 'a')
+ do_test('y', 'y')
+
+ sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y')
+ sr.lower = ''
+ self.assertEqual(sr.MIN, sr.lower)
+
+ sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y')
+ with self.assertRaises(ValueError) as cm:
+ sr.lower = 'z'
+ self.assertIn("lower ('z') must be less than or equal to upper ('y')",
+ str(cm.exception))
+ self.assertEqual('b', sr.lower)
+ self.assertEqual('y', sr.upper)
+
+ def do_test(bad_value):
+ with self.assertRaises(TypeError) as cm:
+ sr.lower = bad_value
+ self.assertIn("lower must be a string", str(cm.exception))
+ self.assertEqual('b', sr.lower)
+ self.assertEqual('y', sr.upper)
+
+ do_test(1)
+ do_test(1.234)
+
+ def test_upper_setter(self):
+ sr = utils.ShardRange('a/c', utils.Timestamp.now(), '', 'y')
+ # sanity checks
+ self.assertEqual(sr.MIN, sr.lower)
+ self.assertEqual('y', sr.upper)
+
+ def do_test(good_value, expected):
+ sr.upper = good_value
+ self.assertEqual(expected, sr.upper)
+ self.assertEqual(sr.MIN, sr.lower)
+
+ do_test(utils.ShardRange.MIN, utils.ShardRange.MIN)
+ do_test(utils.ShardRange.MAX, utils.ShardRange.MAX)
+ do_test('', utils.ShardRange.MAX)
+ do_test(u'', utils.ShardRange.MAX)
+ do_test(None, utils.ShardRange.MAX)
+ do_test('z', 'z')
+ do_test('b', 'b')
+
+ sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y')
+ sr.upper = ''
+ self.assertEqual(sr.MAX, sr.upper)
+
+ sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y')
+ with self.assertRaises(ValueError) as cm:
+ sr.upper = 'a'
+ self.assertIn(
+ "upper ('a') must be greater than or equal to lower ('b')",
+ str(cm.exception))
+ self.assertEqual('b', sr.lower)
+ self.assertEqual('y', sr.upper)
+
+ def do_test(bad_value):
+ with self.assertRaises(TypeError) as cm:
+ sr.upper = bad_value
+ self.assertIn("upper must be a string", str(cm.exception))
+ self.assertEqual('b', sr.lower)
+ self.assertEqual('y', sr.upper)
+
+ do_test(1)
+ do_test(1.234)
+
+ def test_end_marker(self):
+ sr = utils.ShardRange('a/c', utils.Timestamp.now(), '', 'y')
+ self.assertEqual('y\x00', sr.end_marker)
+ sr = utils.ShardRange('a/c', utils.Timestamp.now(), '', '')
+ self.assertEqual('', sr.end_marker)
+
+ def test_bounds_serialization(self):
+ sr = utils.ShardRange('a/c', utils.Timestamp.now())
+ self.assertEqual('a/c', sr.name)
+ self.assertEqual(utils.ShardRange.MIN, sr.lower)
+ self.assertEqual('', sr.lower_str)
+ self.assertEqual(utils.ShardRange.MAX, sr.upper)
+ self.assertEqual('', sr.upper_str)
+ self.assertEqual('', sr.end_marker)
+
+ lower = u'\u00e4'
+ upper = u'\u00fb'
+ sr = utils.ShardRange('a/%s-%s' % (lower, upper),
+ utils.Timestamp.now(), lower, upper)
+ if six.PY3:
+ self.assertEqual(u'\u00e4', sr.lower)
+ self.assertEqual(u'\u00e4', sr.lower_str)
+ self.assertEqual(u'\u00fb', sr.upper)
+ self.assertEqual(u'\u00fb', sr.upper_str)
+ self.assertEqual(u'\u00fb\x00', sr.end_marker)
+ else:
+ self.assertEqual(u'\u00e4'.encode('utf8'), sr.lower)
+ self.assertEqual(u'\u00e4'.encode('utf8'), sr.lower_str)
+ self.assertEqual(u'\u00fb'.encode('utf8'), sr.upper)
+ self.assertEqual(u'\u00fb'.encode('utf8'), sr.upper_str)
+ self.assertEqual(u'\u00fb\x00'.encode('utf8'), sr.end_marker)
+
+ def test_entire_namespace(self):
+ # test entire range (no boundaries)
+ entire = utils.ShardRange('a/test', utils.Timestamp.now())
+ self.assertEqual(utils.ShardRange.MAX, entire.upper)
+ self.assertEqual(utils.ShardRange.MIN, entire.lower)
+ self.assertIs(True, entire.entire_namespace())
+
+ for x in range(100):
+ self.assertTrue(str(x) in entire)
+ self.assertTrue(chr(x) in entire)
+
+ for x in ('a', 'z', 'zzzz', '124fsdf', u'\u00e4'):
+ self.assertTrue(x in entire, '%r should be in %r' % (x, entire))
+
+ entire.lower = 'a'
+ self.assertIs(False, entire.entire_namespace())
+
+ def test_comparisons(self):
+ ts = utils.Timestamp.now().internal
+
+ # upper (if provided) *must* be greater than lower
+ with self.assertRaises(ValueError):
+ utils.ShardRange('f-a', ts, 'f', 'a')
+
+ # test basic boundaries
+ btoc = utils.ShardRange('a/b-c', ts, 'b', 'c')
+ atof = utils.ShardRange('a/a-f', ts, 'a', 'f')
+ ftol = utils.ShardRange('a/f-l', ts, 'f', 'l')
+ ltor = utils.ShardRange('a/l-r', ts, 'l', 'r')
+ rtoz = utils.ShardRange('a/r-z', ts, 'r', 'z')
+ lower = utils.ShardRange('a/lower', ts, '', 'mid')
+ upper = utils.ShardRange('a/upper', ts, 'mid', '')
+ entire = utils.ShardRange('a/test', utils.Timestamp.now())
+
+ # overlapping ranges
+ dtof = utils.ShardRange('a/d-f', ts, 'd', 'f')
+ dtom = utils.ShardRange('a/d-m', ts, 'd', 'm')
+
+ # test range > and <
+ # non-adjacent
+ self.assertFalse(rtoz < atof)
+ self.assertTrue(atof < ltor)
+ self.assertTrue(ltor > atof)
+ self.assertFalse(ftol > rtoz)
+
+ # adjacent
+ self.assertFalse(rtoz < ltor)
+ self.assertTrue(ltor < rtoz)
+ self.assertFalse(ltor > rtoz)
+ self.assertTrue(rtoz > ltor)
+
+ # wholly within
+ self.assertFalse(btoc < atof)
+ self.assertFalse(btoc > atof)
+ self.assertFalse(atof < btoc)
+ self.assertFalse(atof > btoc)
+
+ self.assertFalse(atof < dtof)
+ self.assertFalse(dtof > atof)
+ self.assertFalse(atof > dtof)
+ self.assertFalse(dtof < atof)
+
+ self.assertFalse(dtof < dtom)
+ self.assertFalse(dtof > dtom)
+ self.assertFalse(dtom > dtof)
+ self.assertFalse(dtom < dtof)
+
+ # overlaps
+ self.assertFalse(atof < dtom)
+ self.assertFalse(atof > dtom)
+ self.assertFalse(ltor > dtom)
+
+ # ranges including min/max bounds
+ self.assertTrue(upper > lower)
+ self.assertTrue(lower < upper)
+ self.assertFalse(upper < lower)
+ self.assertFalse(lower > upper)
+
+ self.assertFalse(lower < entire)
+ self.assertFalse(entire > lower)
+ self.assertFalse(lower > entire)
+ self.assertFalse(entire < lower)
+
+ self.assertFalse(upper < entire)
+ self.assertFalse(entire > upper)
+ self.assertFalse(upper > entire)
+ self.assertFalse(entire < upper)
+
+ self.assertFalse(entire < entire)
+ self.assertFalse(entire > entire)
+
+ # test range < and > to an item
+ # range is > lower and <= upper to lower boundary isn't
+ # actually included
+ self.assertTrue(ftol > 'f')
+ self.assertFalse(atof < 'f')
+ self.assertTrue(ltor < 'y')
+
+ self.assertFalse(ftol < 'f')
+ self.assertFalse(atof > 'f')
+ self.assertFalse(ltor > 'y')
+
+ self.assertTrue('f' < ftol)
+ self.assertFalse('f' > atof)
+ self.assertTrue('y' > ltor)
+
+ self.assertFalse('f' > ftol)
+ self.assertFalse('f' < atof)
+ self.assertFalse('y' < ltor)
+
+ # Now test ranges with only 1 boundary
+ start_to_l = utils.ShardRange('a/None-l', ts, '', 'l')
+ l_to_end = utils.ShardRange('a/l-None', ts, 'l', '')
+
+ for x in ('l', 'm', 'z', 'zzz1231sd'):
+ if x == 'l':
+ self.assertFalse(x in l_to_end)
+ self.assertFalse(start_to_l < x)
+ self.assertFalse(x > start_to_l)
+ else:
+ self.assertTrue(x in l_to_end)
+ self.assertTrue(start_to_l < x)
+ self.assertTrue(x > start_to_l)
+
+ # Now test some of the range to range checks with missing boundaries
+ self.assertFalse(atof < start_to_l)
+ self.assertFalse(start_to_l < entire)
+
+ # Now test ShardRange.overlaps(other)
+ self.assertTrue(atof.overlaps(atof))
+ self.assertFalse(atof.overlaps(ftol))
+ self.assertFalse(ftol.overlaps(atof))
+ self.assertTrue(atof.overlaps(dtof))
+ self.assertTrue(dtof.overlaps(atof))
+ self.assertFalse(dtof.overlaps(ftol))
+ self.assertTrue(dtom.overlaps(ftol))
+ self.assertTrue(ftol.overlaps(dtom))
+ self.assertFalse(start_to_l.overlaps(l_to_end))
+
+ def test_contains(self):
+ ts = utils.Timestamp.now().internal
+ lower = utils.ShardRange('a/-h', ts, '', 'h')
+ mid = utils.ShardRange('a/h-p', ts, 'h', 'p')
+ upper = utils.ShardRange('a/p-', ts, 'p', '')
+ entire = utils.ShardRange('a/all', ts, '', '')
+
+ self.assertTrue('a' in entire)
+ self.assertTrue('x' in entire)
+
+ # the empty string is not a valid object name, so it cannot be in any
+ # range
+ self.assertFalse('' in lower)
+ self.assertFalse('' in upper)
+ self.assertFalse('' in entire)
+
+ self.assertTrue('a' in lower)
+ self.assertTrue('h' in lower)
+ self.assertFalse('i' in lower)
+
+ self.assertFalse('h' in mid)
+ self.assertTrue('p' in mid)
+
+ self.assertFalse('p' in upper)
+ self.assertTrue('x' in upper)
+
+ self.assertIn(utils.ShardRange.MAX, entire)
+ self.assertNotIn(utils.ShardRange.MAX, lower)
+ self.assertIn(utils.ShardRange.MAX, upper)
+
+ # lower bound is excluded so MIN cannot be in any range.
+ self.assertNotIn(utils.ShardRange.MIN, entire)
+ self.assertNotIn(utils.ShardRange.MIN, upper)
+ self.assertNotIn(utils.ShardRange.MIN, lower)
+
+ def test_includes(self):
+ ts = utils.Timestamp.now().internal
+ _to_h = utils.ShardRange('a/-h', ts, '', 'h')
+ d_to_t = utils.ShardRange('a/d-t', ts, 'd', 't')
+ d_to_k = utils.ShardRange('a/d-k', ts, 'd', 'k')
+ e_to_l = utils.ShardRange('a/e-l', ts, 'e', 'l')
+ k_to_t = utils.ShardRange('a/k-t', ts, 'k', 't')
+ p_to_ = utils.ShardRange('a/p-', ts, 'p', '')
+ t_to_ = utils.ShardRange('a/t-', ts, 't', '')
+ entire = utils.ShardRange('a/all', ts, '', '')
+
+ self.assertTrue(entire.includes(entire))
+ self.assertTrue(d_to_t.includes(d_to_t))
+ self.assertTrue(_to_h.includes(_to_h))
+ self.assertTrue(p_to_.includes(p_to_))
+
+ self.assertTrue(entire.includes(_to_h))
+ self.assertTrue(entire.includes(d_to_t))
+ self.assertTrue(entire.includes(p_to_))
+
+ self.assertTrue(d_to_t.includes(d_to_k))
+ self.assertTrue(d_to_t.includes(e_to_l))
+ self.assertTrue(d_to_t.includes(k_to_t))
+ self.assertTrue(p_to_.includes(t_to_))
+
+ self.assertFalse(_to_h.includes(d_to_t))
+ self.assertFalse(p_to_.includes(d_to_t))
+ self.assertFalse(k_to_t.includes(d_to_k))
+ self.assertFalse(d_to_k.includes(e_to_l))
+ self.assertFalse(k_to_t.includes(e_to_l))
+ self.assertFalse(t_to_.includes(p_to_))
+
+ self.assertFalse(_to_h.includes(entire))
+ self.assertFalse(p_to_.includes(entire))
+ self.assertFalse(d_to_t.includes(entire))
+
+ def test_repr(self):
+ ts = next(self.ts_iter)
+ ts.offset = 1234
+ meta_ts = next(self.ts_iter)
+ state_ts = next(self.ts_iter)
+ sr = utils.ShardRange('a/c', ts, 'l', 'u', 100, 1000,
+ meta_timestamp=meta_ts,
+ state=utils.ShardRange.ACTIVE,
+ state_timestamp=state_ts)
+ self.assertEqual(
+ "ShardRange<'l' to 'u' as of %s, (100, 1000) as of %s, "
+ "active as of %s>"
+ % (ts.internal, meta_ts.internal, state_ts.internal), str(sr))
+
+ ts.offset = 0
+ meta_ts.offset = 2
+ state_ts.offset = 3
+ sr = utils.ShardRange('a/c', ts, '', '', 100, 1000,
+ meta_timestamp=meta_ts,
+ state=utils.ShardRange.FOUND,
+ state_timestamp=state_ts)
+ self.assertEqual(
+ "ShardRange"
+ % (ts.internal, meta_ts.internal, state_ts.internal), str(sr))
+
+ def test_copy(self):
+ sr = utils.ShardRange('a/c', next(self.ts_iter), 'x', 'y', 99, 99000,
+ meta_timestamp=next(self.ts_iter),
+ state=utils.ShardRange.CREATED,
+ state_timestamp=next(self.ts_iter))
+ new = sr.copy()
+ self.assertEqual(dict(sr), dict(new))
+
+ new = sr.copy(deleted=1)
+ self.assertEqual(dict(sr, deleted=1), dict(new))
+
+ new_timestamp = next(self.ts_iter)
+ new = sr.copy(timestamp=new_timestamp)
+ self.assertEqual(dict(sr, timestamp=new_timestamp.internal,
+ meta_timestamp=new_timestamp.internal,
+ state_timestamp=new_timestamp.internal),
+ dict(new))
+
+ new = sr.copy(timestamp=new_timestamp, object_count=99)
+ self.assertEqual(dict(sr, timestamp=new_timestamp.internal,
+ meta_timestamp=new_timestamp.internal,
+ state_timestamp=new_timestamp.internal,
+ object_count=99),
+ dict(new))
+
+ def test_make_path(self):
+ ts = utils.Timestamp.now()
+ actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 0)
+ parent_hash = hashlib.md5(b'parent').hexdigest()
+ self.assertEqual('a/root-%s-%s-0' % (parent_hash, ts.internal), actual)
+ actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 3)
+ self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual)
+ actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, '3')
+ self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual)
+ actual = utils.ShardRange.make_path(
+ 'a', 'root', 'parent', ts.internal, '3')
+ self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual)
+ actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 'foo')
+ self.assertEqual('a/root-%s-%s-foo' % (parent_hash, ts.internal),
+ actual)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/unit/common/test_wsgi.py b/test/unit/common/test_wsgi.py
index 774fcf84e8..8e88d09b59 100644
--- a/test/unit/common/test_wsgi.py
+++ b/test/unit/common/test_wsgi.py
@@ -1270,9 +1270,10 @@ class TestWorkersStrategy(unittest.TestCase):
pid += 1
sock_count += 1
+ mypid = os.getpid()
self.assertEqual([
- 'Started child %s' % 88,
- 'Started child %s' % 89,
+ 'Started child %s from parent %s' % (88, mypid),
+ 'Started child %s from parent %s' % (89, mypid),
], self.logger.get_lines_for_level('notice'))
self.assertEqual(2, sock_count)
@@ -1282,7 +1283,7 @@ class TestWorkersStrategy(unittest.TestCase):
self.strategy.register_worker_exit(88)
self.assertEqual([
- 'Removing dead child %s' % 88,
+ 'Removing dead child %s from parent %s' % (88, mypid)
], self.logger.get_lines_for_level('error'))
for s, i in self.strategy.new_worker_socks():
@@ -1294,9 +1295,9 @@ class TestWorkersStrategy(unittest.TestCase):
self.assertEqual(1, sock_count)
self.assertEqual([
- 'Started child %s' % 88,
- 'Started child %s' % 89,
- 'Started child %s' % 90,
+ 'Started child %s from parent %s' % (88, mypid),
+ 'Started child %s from parent %s' % (89, mypid),
+ 'Started child %s from parent %s' % (90, mypid),
], self.logger.get_lines_for_level('notice'))
def test_post_fork_hook(self):
diff --git a/test/unit/container/test_backend.py b/test/unit/container/test_backend.py
index 1febf47cfb..79ede02901 100644
--- a/test/unit/container/test_backend.py
+++ b/test/unit/container/test_backend.py
@@ -14,13 +14,13 @@
# limitations under the License.
""" Tests for swift.container.backend """
-
+import errno
import os
import hashlib
+import inspect
import unittest
from time import sleep, time
from uuid import uuid4
-import itertools
import random
from collections import defaultdict
from contextlib import contextmanager
@@ -28,38 +28,69 @@ import sqlite3
import pickle
import json
+from swift.common.exceptions import LockTimeout
from swift.container.backend import ContainerBroker, \
- update_new_item_from_existing
-from swift.common.utils import Timestamp, encode_timestamps
+ update_new_item_from_existing, UNSHARDED, SHARDING, SHARDED, \
+ COLLAPSED, SHARD_LISTING_STATES, SHARD_UPDATE_STATES
+from swift.common.db import DatabaseAlreadyExists, GreenDBConnection
+from swift.common.utils import Timestamp, encode_timestamps, hash_path, \
+ ShardRange, make_db_file_path
from swift.common.storage_policy import POLICIES
import mock
+from test import annotate_failure
from test.unit import (patch_policies, with_tempdir, make_timestamp_iter,
- EMPTY_ETAG)
+ EMPTY_ETAG, FakeLogger, mock_timestamp_now)
from test.unit.common import test_db
class TestContainerBroker(unittest.TestCase):
"""Tests for ContainerBroker"""
+ expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object',
+ 'sqlite_sequence', 'policy_stat',
+ 'container_info', 'shard_range'}
+
+ def _assert_shard_ranges(self, broker, expected, include_own=False):
+ actual = broker.get_shard_ranges(include_deleted=True,
+ include_own=include_own)
+ self.assertEqual([dict(sr) for sr in expected],
+ [dict(sr) for sr in actual])
def test_creation(self):
# Test ContainerBroker.__init__
broker = ContainerBroker(':memory:', account='a', container='c')
- self.assertEqual(broker.db_file, ':memory:')
+ self.assertEqual(broker._db_file, ':memory:')
broker.initialize(Timestamp('1').internal, 0)
with broker.get() as conn:
curs = conn.cursor()
curs.execute('SELECT 1')
self.assertEqual(curs.fetchall()[0][0], 1)
+ curs.execute("SELECT name FROM sqlite_master WHERE type='table';")
+ self.assertEqual(self.expected_db_tables,
+ {row[0] for row in curs.fetchall()})
+ # check the update trigger
+ broker.put_object('blah', Timestamp.now().internal, 0, 'text/plain',
+ 'etag', 0, 0)
+ with broker.get() as conn:
+ with self.assertRaises(sqlite3.DatabaseError) as cm:
+ conn.execute('UPDATE object SET name="blah";')
+ self.assertIn('UPDATE not allowed', str(cm.exception))
+ if 'shard_range' in self.expected_db_tables:
+ # check the update trigger
+ broker.merge_shard_ranges(broker.get_own_shard_range())
+ with broker.get() as conn:
+ with self.assertRaises(sqlite3.DatabaseError) as cm:
+ conn.execute('UPDATE shard_range SET name="blah";')
+ self.assertIn('UPDATE not allowed', str(cm.exception))
@patch_policies
def test_storage_policy_property(self):
- ts = (Timestamp(t).internal for t in itertools.count(int(time())))
+ ts = make_timestamp_iter()
for policy in POLICIES:
broker = ContainerBroker(':memory:', account='a',
container='policy_%s' % policy.name)
- broker.initialize(next(ts), policy.idx)
+ broker.initialize(next(ts).internal, policy.idx)
with broker.get() as conn:
try:
conn.execute('''SELECT storage_policy_index
@@ -92,16 +123,296 @@ class TestContainerBroker(unittest.TestCase):
pass
self.assertTrue(broker.conn is None)
- def test_empty(self):
+ @with_tempdir
+ def test_is_deleted(self, tempdir):
+ # Test ContainerBroker.is_deleted() and get_info_is_deleted()
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(next(ts_iter).internal, 0)
+
+ self.assertFalse(broker.is_deleted())
+ broker.delete_db(next(ts_iter).internal)
+ self.assertTrue(broker.is_deleted())
+
+ def check_object_counted(broker_to_test, broker_with_object):
+ obj = {'name': 'o', 'created_at': next(ts_iter).internal,
+ 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG,
+ 'deleted': 0}
+ broker_with_object.merge_items([dict(obj)])
+ self.assertFalse(broker_to_test.is_deleted())
+ info, deleted = broker_to_test.get_info_is_deleted()
+ self.assertFalse(deleted)
+ self.assertEqual(1, info['object_count'])
+ obj.update({'created_at': next(ts_iter).internal, 'deleted': 1})
+ broker_with_object.merge_items([dict(obj)])
+ self.assertTrue(broker_to_test.is_deleted())
+ info, deleted = broker_to_test.get_info_is_deleted()
+ self.assertTrue(deleted)
+ self.assertEqual(0, info['object_count'])
+
+ def check_object_not_counted(broker):
+ obj = {'name': 'o', 'created_at': next(ts_iter).internal,
+ 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG,
+ 'deleted': 0}
+ broker.merge_items([dict(obj)])
+ self.assertTrue(broker.is_deleted())
+ info, deleted = broker.get_info_is_deleted()
+ self.assertTrue(deleted)
+ self.assertEqual(0, info['object_count'])
+ obj.update({'created_at': next(ts_iter).internal, 'deleted': 1})
+ broker.merge_items([dict(obj)])
+ self.assertTrue(broker.is_deleted())
+ info, deleted = broker.get_info_is_deleted()
+ self.assertTrue(deleted)
+ self.assertEqual(0, info['object_count'])
+
+ def check_shard_ranges_not_counted():
+ sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0)
+ sr.update_meta(13, 99, meta_timestamp=next(ts_iter))
+ for state in ShardRange.STATES:
+ sr.update_state(state, state_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([sr])
+ self.assertTrue(broker.is_deleted())
+ info, deleted = broker.get_info_is_deleted()
+ self.assertTrue(deleted)
+ self.assertEqual(0, info['object_count'])
+
+ def check_shard_ranges_counted():
+ sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0)
+ sr.update_meta(13, 99, meta_timestamp=next(ts_iter))
+ counted_states = (ShardRange.ACTIVE, ShardRange.SHARDING,
+ ShardRange.SHRINKING)
+ for state in ShardRange.STATES:
+ sr.update_state(state, state_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([sr])
+ expected = state not in counted_states
+ self.assertEqual(expected, broker.is_deleted())
+ info, deleted = broker.get_info_is_deleted()
+ self.assertEqual(expected, deleted)
+ self.assertEqual(0 if expected else 13, info['object_count'])
+
+ sr.update_meta(0, 0, meta_timestamp=next(ts_iter))
+ for state in ShardRange.STATES:
+ sr.update_state(state, state_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([sr])
+ self.assertTrue(broker.is_deleted())
+ info, deleted = broker.get_info_is_deleted()
+ self.assertTrue(deleted)
+ self.assertEqual(0, info['object_count'])
+
+ # unsharded
+ check_object_counted(broker, broker)
+ check_shard_ranges_not_counted()
+
+ # move to sharding state
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+ broker.delete_db(next(ts_iter).internal)
+ self.assertTrue(broker.is_deleted())
+
+ # check object in retiring db is considered
+ check_object_counted(broker, broker.get_brokers()[0])
+ self.assertTrue(broker.is_deleted())
+ check_shard_ranges_not_counted()
+ # misplaced object in fresh db is not considered
+ check_object_not_counted(broker)
+
+ # move to sharded state
+ self.assertTrue(broker.set_sharded_state())
+ check_object_not_counted(broker)
+ check_shard_ranges_counted()
+
+ # own shard range has no influence
+ own_sr = broker.get_own_shard_range()
+ own_sr.update_meta(3, 4, meta_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([own_sr])
+ self.assertTrue(broker.is_deleted())
+
+ @with_tempdir
+ def test_empty(self, tempdir):
# Test ContainerBroker.empty
- broker = ContainerBroker(':memory:', account='a', container='c')
- broker.initialize(Timestamp('1').internal, 0)
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(next(ts_iter).internal, 0)
+ self.assertTrue(broker.is_root_container())
+
+ def check_object_counted(broker_to_test, broker_with_object):
+ obj = {'name': 'o', 'created_at': next(ts_iter).internal,
+ 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG,
+ 'deleted': 0}
+ broker_with_object.merge_items([dict(obj)])
+ self.assertFalse(broker_to_test.empty())
+ # and delete it
+ obj.update({'created_at': next(ts_iter).internal, 'deleted': 1})
+ broker_with_object.merge_items([dict(obj)])
+ self.assertTrue(broker_to_test.empty())
+
+ def check_shard_ranges_not_counted():
+ sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0)
+ sr.update_meta(13, 99, meta_timestamp=next(ts_iter))
+ for state in ShardRange.STATES:
+ sr.update_state(state, state_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([sr])
+ self.assertTrue(broker.empty())
+
+ # empty other shard ranges do not influence result
+ sr.update_meta(0, 0, meta_timestamp=next(ts_iter))
+ for state in ShardRange.STATES:
+ sr.update_state(state, state_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([sr])
+ self.assertTrue(broker.empty())
+
self.assertTrue(broker.empty())
- broker.put_object('o', Timestamp.now().internal, 0, 'text/plain',
- 'd41d8cd98f00b204e9800998ecf8427e')
- self.assertTrue(not broker.empty())
- sleep(.00001)
- broker.delete_object('o', Timestamp.now().internal)
+ check_object_counted(broker, broker)
+ check_shard_ranges_not_counted()
+
+ # own shard range is not considered for object count
+ own_sr = broker.get_own_shard_range()
+ self.assertEqual(0, own_sr.object_count)
+ broker.merge_shard_ranges([own_sr])
+ self.assertTrue(broker.empty())
+
+ broker.put_object('o', next(ts_iter).internal, 0, 'text/plain',
+ EMPTY_ETAG)
+ own_sr = broker.get_own_shard_range()
+ self.assertEqual(1, own_sr.object_count)
+ broker.merge_shard_ranges([own_sr])
+ self.assertFalse(broker.empty())
+ broker.delete_object('o', next(ts_iter).internal)
+ self.assertTrue(broker.empty())
+
+ # have own shard range but in state ACTIVE
+ self.assertEqual(ShardRange.ACTIVE, own_sr.state)
+ check_object_counted(broker, broker)
+ check_shard_ranges_not_counted()
+
+ def check_shard_ranges_counted():
+ # other shard range is considered
+ sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0)
+ sr.update_meta(13, 99, meta_timestamp=next(ts_iter))
+ counted_states = (ShardRange.ACTIVE, ShardRange.SHARDING,
+ ShardRange.SHRINKING)
+ for state in ShardRange.STATES:
+ sr.update_state(state, state_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([sr])
+ self.assertEqual(state not in counted_states, broker.empty())
+
+ # empty other shard ranges do not influence result
+ sr.update_meta(0, 0, meta_timestamp=next(ts_iter))
+ for state in ShardRange.STATES:
+ sr.update_state(state, state_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([sr])
+ self.assertTrue(broker.empty())
+
+ # enable sharding
+ broker.enable_sharding(next(ts_iter))
+ check_object_counted(broker, broker)
+ check_shard_ranges_counted()
+
+ # move to sharding state
+ self.assertTrue(broker.set_sharding_state())
+ # check object in retiring db is considered
+ check_object_counted(broker, broker.get_brokers()[0])
+ self.assertTrue(broker.empty())
+ # as well as misplaced objects in fresh db
+ check_object_counted(broker, broker)
+ check_shard_ranges_counted()
+
+ # move to sharded state
+ self.assertTrue(broker.set_sharded_state())
+ self.assertTrue(broker.empty())
+ check_object_counted(broker, broker)
+ check_shard_ranges_counted()
+
+ # own shard range still has no influence
+ own_sr = broker.get_own_shard_range()
+ own_sr.update_meta(3, 4, meta_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([own_sr])
+ self.assertTrue(broker.empty())
+
+ @with_tempdir
+ def test_empty_shard_container(self, tempdir):
+ # Test ContainerBroker.empty for a shard container where shard range
+ # usage should not be considered
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(db_path, account='.shards_a', container='cc')
+ broker.initialize(next(ts_iter).internal, 0)
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ self.assertFalse(broker.is_root_container())
+
+ def check_object_counted(broker_to_test, broker_with_object):
+ obj = {'name': 'o', 'created_at': next(ts_iter).internal,
+ 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG,
+ 'deleted': 0}
+ broker_with_object.merge_items([dict(obj)])
+ self.assertFalse(broker_to_test.empty())
+ # and delete it
+ obj.update({'created_at': next(ts_iter).internal, 'deleted': 1})
+ broker_with_object.merge_items([dict(obj)])
+ self.assertTrue(broker_to_test.empty())
+
+ self.assertTrue(broker.empty())
+ check_object_counted(broker, broker)
+
+ # own shard range is not considered for object count
+ own_sr = broker.get_own_shard_range()
+ self.assertEqual(0, own_sr.object_count)
+ broker.merge_shard_ranges([own_sr])
+ self.assertTrue(broker.empty())
+
+ broker.put_object('o', next(ts_iter).internal, 0, 'text/plain',
+ EMPTY_ETAG)
+ own_sr = broker.get_own_shard_range()
+ self.assertEqual(1, own_sr.object_count)
+ broker.merge_shard_ranges([own_sr])
+ self.assertFalse(broker.empty())
+ broker.delete_object('o', next(ts_iter).internal)
+ self.assertTrue(broker.empty())
+
+ def check_shard_ranges_not_counted():
+ sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0)
+ sr.update_meta(13, 99, meta_timestamp=next(ts_iter))
+ for state in ShardRange.STATES:
+ sr.update_state(state, state_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([sr])
+ self.assertTrue(broker.empty())
+
+ # empty other shard ranges do not influence result
+ sr.update_meta(0, 0, meta_timestamp=next(ts_iter))
+ for state in ShardRange.STATES:
+ sr.update_state(state, state_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([sr])
+ self.assertTrue(broker.empty())
+
+ check_shard_ranges_not_counted()
+
+ # move to sharding state
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+
+ # check object in retiring db is considered
+ check_object_counted(broker, broker.get_brokers()[0])
+ self.assertTrue(broker.empty())
+ # as well as misplaced objects in fresh db
+ check_object_counted(broker, broker)
+ check_shard_ranges_not_counted()
+
+ # move to sharded state
+ self.assertTrue(broker.set_sharded_state())
+ self.assertTrue(broker.empty())
+ check_object_counted(broker, broker)
+ check_shard_ranges_not_counted()
+
+ # own shard range still has no influence
+ own_sr = broker.get_own_shard_range()
+ own_sr.update_meta(3, 4, meta_timestamp=next(ts_iter))
+ broker.merge_shard_ranges([own_sr])
self.assertTrue(broker.empty())
def test_reclaim(self):
@@ -164,48 +475,120 @@ class TestContainerBroker(unittest.TestCase):
broker.reclaim(Timestamp.now().internal, time())
broker.delete_db(Timestamp.now().internal)
+ @with_tempdir
+ def test_reclaim_deadlock(self, tempdir):
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4())
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(Timestamp(100).internal, 0)
+ # there's some magic count here that causes the failure, something
+ # about the size of object records and sqlite page size maybe?
+ count = 23000
+ for i in range(count):
+ obj_name = 'o%d' % i
+ ts = Timestamp(200).internal
+ broker.delete_object(obj_name, ts)
+ broker._commit_puts()
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT count(*) FROM object").fetchone()[0], count)
+ # make a broker whose container attribute is not yet set so that
+ # reclaim will need to query info to set it
+ broker = ContainerBroker(db_path, timeout=1)
+ # verify that reclaim doesn't get deadlocked and timeout
+ broker.reclaim(300, 300)
+ # check all objects were reclaimed
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT count(*) FROM object"
+ ).fetchone()[0], 0)
+
+ @with_tempdir
+ def test_reclaim_shard_ranges(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4())
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(next(ts_iter).internal, 0)
+ older = next(ts_iter)
+ same = next(ts_iter)
+ newer = next(ts_iter)
+ shard_ranges = [
+ ShardRange('.shards_a/older_deleted', older.internal, '', 'a',
+ deleted=True),
+ ShardRange('.shards_a/same_deleted', same.internal, 'a', 'b',
+ deleted=True),
+ ShardRange('.shards_a/newer_deleted', newer.internal, 'b', 'c',
+ deleted=True),
+ ShardRange('.shards_a/older', older.internal, 'c', 'd'),
+ ShardRange('.shards_a/same', same.internal, 'd', 'e'),
+ ShardRange('.shards_a/newer', newer.internal, 'e', 'f'),
+ # own shard range is never reclaimed, even if deleted
+ ShardRange('a/c', older.internal, '', '', deleted=True)]
+ broker.merge_shard_ranges(
+ random.sample(shard_ranges, len(shard_ranges)))
+
+ def assert_row_count(expected):
+ with broker.get() as conn:
+ res = conn.execute("SELECT count(*) FROM shard_range")
+ self.assertEqual(expected, res.fetchone()[0])
+
+ broker.reclaim(older.internal, older.internal)
+ assert_row_count(7)
+ self._assert_shard_ranges(broker, shard_ranges, include_own=True)
+ broker.reclaim(older.internal, same.internal)
+ assert_row_count(6)
+ self._assert_shard_ranges(broker, shard_ranges[1:], include_own=True)
+ broker.reclaim(older.internal, newer.internal)
+ assert_row_count(5)
+ self._assert_shard_ranges(broker, shard_ranges[2:], include_own=True)
+ broker.reclaim(older.internal, next(ts_iter).internal)
+ assert_row_count(4)
+ self._assert_shard_ranges(broker, shard_ranges[3:], include_own=True)
+
def test_get_info_is_deleted(self):
- start = int(time())
- ts = (Timestamp(t).internal for t in itertools.count(start))
+ ts = make_timestamp_iter()
+ start = next(ts)
broker = ContainerBroker(':memory:', account='test_account',
container='test_container')
# create it
- broker.initialize(next(ts), POLICIES.default.idx)
+ broker.initialize(start.internal, POLICIES.default.idx)
info, is_deleted = broker.get_info_is_deleted()
self.assertEqual(is_deleted, broker.is_deleted())
self.assertEqual(is_deleted, False) # sanity
self.assertEqual(info, broker.get_info())
- self.assertEqual(info['put_timestamp'], Timestamp(start).internal)
+ self.assertEqual(info['put_timestamp'], start.internal)
self.assertTrue(Timestamp(info['created_at']) >= start)
self.assertEqual(info['delete_timestamp'], '0')
if self.__class__ in (TestContainerBrokerBeforeMetadata,
TestContainerBrokerBeforeXSync,
- TestContainerBrokerBeforeSPI):
+ TestContainerBrokerBeforeSPI,
+ TestContainerBrokerBeforeShardRanges):
self.assertEqual(info['status_changed_at'], '0')
else:
self.assertEqual(info['status_changed_at'],
- Timestamp(start).internal)
+ start.internal)
# delete it
delete_timestamp = next(ts)
- broker.delete_db(delete_timestamp)
+ broker.delete_db(delete_timestamp.internal)
info, is_deleted = broker.get_info_is_deleted()
self.assertEqual(is_deleted, True) # sanity
self.assertEqual(is_deleted, broker.is_deleted())
self.assertEqual(info, broker.get_info())
- self.assertEqual(info['put_timestamp'], Timestamp(start).internal)
+ self.assertEqual(info['put_timestamp'], start.internal)
self.assertTrue(Timestamp(info['created_at']) >= start)
self.assertEqual(info['delete_timestamp'], delete_timestamp)
self.assertEqual(info['status_changed_at'], delete_timestamp)
# bring back to life
- broker.put_object('obj', next(ts), 0, 'text/plain', 'etag',
+ broker.put_object('obj', next(ts).internal, 0, 'text/plain', 'etag',
storage_policy_index=broker.storage_policy_index)
info, is_deleted = broker.get_info_is_deleted()
self.assertEqual(is_deleted, False) # sanity
self.assertEqual(is_deleted, broker.is_deleted())
self.assertEqual(info, broker.get_info())
- self.assertEqual(info['put_timestamp'], Timestamp(start).internal)
+ self.assertEqual(info['put_timestamp'], start.internal)
self.assertTrue(Timestamp(info['created_at']) >= start)
self.assertEqual(info['delete_timestamp'], delete_timestamp)
self.assertEqual(info['status_changed_at'], delete_timestamp)
@@ -432,6 +815,273 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(conn.execute(
"SELECT deleted FROM object").fetchone()[0], 0)
+ def test_merge_shard_range_single_record(self):
+ # Test ContainerBroker.merge_shard_range
+ broker = ContainerBroker(':memory:', account='a', container='c')
+ broker.initialize(Timestamp('1').internal, 0)
+
+ ts_iter = make_timestamp_iter()
+ # Stash these for later
+ old_put_timestamp = next(ts_iter).internal
+ old_delete_timestamp = next(ts_iter).internal
+
+ # Create initial object
+ timestamp = next(ts_iter).internal
+ meta_timestamp = next(ts_iter).internal
+ broker.merge_shard_ranges(
+ ShardRange('"a/{}"', timestamp,
+ 'low', 'up', meta_timestamp=meta_timestamp))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT name FROM shard_range").fetchone()[0],
+ '"a/{}"')
+ self.assertEqual(conn.execute(
+ "SELECT timestamp FROM shard_range").fetchone()[0],
+ timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT meta_timestamp FROM shard_range").fetchone()[0],
+ meta_timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT lower FROM shard_range").fetchone()[0], 'low')
+ self.assertEqual(conn.execute(
+ "SELECT upper FROM shard_range").fetchone()[0], 'up')
+ self.assertEqual(conn.execute(
+ "SELECT deleted FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT object_count FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT bytes_used FROM shard_range").fetchone()[0], 0)
+
+ # Reput same event
+ broker.merge_shard_ranges(
+ ShardRange('"a/{}"', timestamp,
+ 'low', 'up', meta_timestamp=meta_timestamp))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT name FROM shard_range").fetchone()[0],
+ '"a/{}"')
+ self.assertEqual(conn.execute(
+ "SELECT timestamp FROM shard_range").fetchone()[0],
+ timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT meta_timestamp FROM shard_range").fetchone()[0],
+ meta_timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT lower FROM shard_range").fetchone()[0], 'low')
+ self.assertEqual(conn.execute(
+ "SELECT upper FROM shard_range").fetchone()[0], 'up')
+ self.assertEqual(conn.execute(
+ "SELECT deleted FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT object_count FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT bytes_used FROM shard_range").fetchone()[0], 0)
+
+ # Put new event
+ timestamp = next(ts_iter).internal
+ meta_timestamp = next(ts_iter).internal
+ broker.merge_shard_ranges(
+ ShardRange('"a/{}"', timestamp,
+ 'lower', 'upper', 1, 2, meta_timestamp=meta_timestamp))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT name FROM shard_range").fetchone()[0],
+ '"a/{}"')
+ self.assertEqual(conn.execute(
+ "SELECT timestamp FROM shard_range").fetchone()[0],
+ timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT meta_timestamp FROM shard_range").fetchone()[0],
+ meta_timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT lower FROM shard_range").fetchone()[0], 'lower')
+ self.assertEqual(conn.execute(
+ "SELECT upper FROM shard_range").fetchone()[0], 'upper')
+ self.assertEqual(conn.execute(
+ "SELECT deleted FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT object_count FROM shard_range").fetchone()[0], 1)
+ self.assertEqual(conn.execute(
+ "SELECT bytes_used FROM shard_range").fetchone()[0], 2)
+
+ # Put old event
+ broker.merge_shard_ranges(
+ ShardRange('"a/{}"', old_put_timestamp,
+ 'lower', 'upper', 1, 2, meta_timestamp=meta_timestamp))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT name FROM shard_range").fetchone()[0],
+ '"a/{}"')
+ self.assertEqual(conn.execute(
+ "SELECT timestamp FROM shard_range").fetchone()[0],
+ timestamp) # Not old_put_timestamp!
+ self.assertEqual(conn.execute(
+ "SELECT meta_timestamp FROM shard_range").fetchone()[0],
+ meta_timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT lower FROM shard_range").fetchone()[0], 'lower')
+ self.assertEqual(conn.execute(
+ "SELECT upper FROM shard_range").fetchone()[0], 'upper')
+ self.assertEqual(conn.execute(
+ "SELECT deleted FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT object_count FROM shard_range").fetchone()[0], 1)
+ self.assertEqual(conn.execute(
+ "SELECT bytes_used FROM shard_range").fetchone()[0], 2)
+
+ # Put old delete event
+ broker.merge_shard_ranges(
+ ShardRange('"a/{}"', old_delete_timestamp,
+ 'lower', 'upper', meta_timestamp=meta_timestamp,
+ deleted=1))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT name FROM shard_range").fetchone()[0],
+ '"a/{}"')
+ self.assertEqual(conn.execute(
+ "SELECT timestamp FROM shard_range").fetchone()[0],
+ timestamp) # Not old_delete_timestamp!
+ self.assertEqual(conn.execute(
+ "SELECT meta_timestamp FROM shard_range").fetchone()[0],
+ meta_timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT lower FROM shard_range").fetchone()[0], 'lower')
+ self.assertEqual(conn.execute(
+ "SELECT upper FROM shard_range").fetchone()[0], 'upper')
+ self.assertEqual(conn.execute(
+ "SELECT deleted FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT object_count FROM shard_range").fetchone()[0], 1)
+ self.assertEqual(conn.execute(
+ "SELECT bytes_used FROM shard_range").fetchone()[0], 2)
+
+ # Put new delete event
+ timestamp = next(ts_iter).internal
+ broker.merge_shard_ranges(
+ ShardRange('"a/{}"', timestamp,
+ 'lower', 'upper', meta_timestamp=meta_timestamp,
+ deleted=1))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT name FROM shard_range").fetchone()[0],
+ '"a/{}"')
+ self.assertEqual(conn.execute(
+ "SELECT timestamp FROM shard_range").fetchone()[0],
+ timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT deleted FROM shard_range").fetchone()[0], 1)
+
+ # Put new event
+ timestamp = next(ts_iter).internal
+ meta_timestamp = next(ts_iter).internal
+ broker.merge_shard_ranges(
+ ShardRange('"a/{}"', timestamp,
+ 'lowerer', 'upperer', 3, 4,
+ meta_timestamp=meta_timestamp))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT name FROM shard_range").fetchone()[0],
+ '"a/{}"')
+ self.assertEqual(conn.execute(
+ "SELECT timestamp FROM shard_range").fetchone()[0],
+ timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT meta_timestamp FROM shard_range").fetchone()[0],
+ meta_timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT lower FROM shard_range").fetchone()[0], 'lowerer')
+ self.assertEqual(conn.execute(
+ "SELECT upper FROM shard_range").fetchone()[0], 'upperer')
+ self.assertEqual(conn.execute(
+ "SELECT deleted FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT object_count FROM shard_range").fetchone()[0], 3)
+ self.assertEqual(conn.execute(
+ "SELECT bytes_used FROM shard_range").fetchone()[0], 4)
+
+ # We'll use this later
+ in_between_timestamp = next(ts_iter).internal
+
+ # New update event, meta_timestamp increases
+ meta_timestamp = next(ts_iter).internal
+ broker.merge_shard_ranges(
+ ShardRange('"a/{}"', timestamp,
+ 'lowerer', 'upperer', 3, 4,
+ meta_timestamp=meta_timestamp))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT name FROM shard_range").fetchone()[0],
+ '"a/{}"')
+ self.assertEqual(conn.execute(
+ "SELECT timestamp FROM shard_range").fetchone()[0],
+ timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT meta_timestamp FROM shard_range").fetchone()[0],
+ meta_timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT lower FROM shard_range").fetchone()[0], 'lowerer')
+ self.assertEqual(conn.execute(
+ "SELECT upper FROM shard_range").fetchone()[0], 'upperer')
+ self.assertEqual(conn.execute(
+ "SELECT deleted FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT object_count FROM shard_range").fetchone()[0], 3)
+ self.assertEqual(conn.execute(
+ "SELECT bytes_used FROM shard_range").fetchone()[0], 4)
+
+ # Put event from after last put but before last post
+ timestamp = in_between_timestamp
+ broker.merge_shard_ranges(
+ ShardRange('"a/{}"', timestamp,
+ 'lowererer', 'uppererer', 5, 6,
+ meta_timestamp=meta_timestamp))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT name FROM shard_range").fetchone()[0],
+ '"a/{}"')
+ self.assertEqual(conn.execute(
+ "SELECT timestamp FROM shard_range").fetchone()[0],
+ timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT meta_timestamp FROM shard_range").fetchone()[0],
+ meta_timestamp)
+ self.assertEqual(conn.execute(
+ "SELECT lower FROM shard_range").fetchone()[0], 'lowererer')
+ self.assertEqual(conn.execute(
+ "SELECT upper FROM shard_range").fetchone()[0], 'uppererer')
+ self.assertEqual(conn.execute(
+ "SELECT deleted FROM shard_range").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT object_count FROM shard_range").fetchone()[0], 5)
+ self.assertEqual(conn.execute(
+ "SELECT bytes_used FROM shard_range").fetchone()[0], 6)
+
+ def test_merge_shard_ranges_deleted(self):
+ # Test ContainerBroker.merge_shard_ranges sets deleted attribute
+ ts_iter = make_timestamp_iter()
+ broker = ContainerBroker(':memory:', account='a', container='c')
+ broker.initialize(Timestamp('1').internal, 0)
+ # put shard range
+ broker.merge_shard_ranges(ShardRange('a/o', next(ts_iter).internal))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT count(*) FROM shard_range "
+ "WHERE deleted = 0").fetchone()[0], 1)
+ self.assertEqual(conn.execute(
+ "SELECT count(*) FROM shard_range "
+ "WHERE deleted = 1").fetchone()[0], 0)
+
+ # delete shard range
+ broker.merge_shard_ranges(ShardRange('a/o', next(ts_iter).internal,
+ deleted=1))
+ with broker.get() as conn:
+ self.assertEqual(conn.execute(
+ "SELECT count(*) FROM shard_range "
+ "WHERE deleted = 0").fetchone()[0], 0)
+ self.assertEqual(conn.execute(
+ "SELECT count(*) FROM shard_range "
+ "WHERE deleted = 1").fetchone()[0], 1)
+
def test_make_tuple_for_pickle(self):
record = {'name': 'obj',
'created_at': '1234567890.12345',
@@ -559,7 +1209,7 @@ class TestContainerBroker(unittest.TestCase):
"SELECT deleted FROM object").fetchone()[0], deleted)
def _test_put_object_multiple_encoded_timestamps(self, broker):
- ts = (Timestamp(t) for t in itertools.count(int(time())))
+ ts = make_timestamp_iter()
broker.initialize(next(ts).internal, 0)
t = [next(ts) for _ in range(9)]
@@ -619,6 +1269,194 @@ class TestContainerBroker(unittest.TestCase):
broker = ContainerBroker(':memory:', account='a', container='c')
self._test_put_object_multiple_encoded_timestamps(broker)
+ @with_tempdir
+ def test_get_db_state(self, tempdir):
+ acct = 'account'
+ cont = 'container'
+ hsh = hash_path(acct, cont)
+ db_file = "%s.db" % hsh
+ epoch = Timestamp.now()
+ fresh_db_file = "%s_%s.db" % (hsh, epoch.normal)
+ db_path = os.path.join(tempdir, db_file)
+ fresh_db_path = os.path.join(tempdir, fresh_db_file)
+ ts = Timestamp.now()
+
+ # First test NOTFOUND state
+ broker = ContainerBroker(db_path, account=acct, container=cont)
+ self.assertEqual(broker.get_db_state(), 'not_found')
+
+ # Test UNSHARDED state, that is when db_file exists and fresh_db_file
+ # doesn't
+ broker.initialize(ts.internal, 0)
+ self.assertEqual(broker.get_db_state(), 'unsharded')
+
+ # Test the SHARDING state, this is the period when both the db_file and
+ # the fresh_db_file exist
+ fresh_broker = ContainerBroker(fresh_db_path, account=acct,
+ container=cont, force_db_file=True)
+ fresh_broker.initialize(ts.internal, 0)
+ own_shard_range = fresh_broker.get_own_shard_range()
+ own_shard_range.update_state(ShardRange.SHARDING)
+ own_shard_range.epoch = epoch
+ shard_range = ShardRange(
+ '.shards_%s/%s' % (acct, cont), Timestamp.now())
+ fresh_broker.merge_shard_ranges([own_shard_range, shard_range])
+
+ self.assertEqual(fresh_broker.get_db_state(), 'sharding')
+ # old broker will also change state if we reload its db files
+ broker.reload_db_files()
+ self.assertEqual(broker.get_db_state(), 'sharding')
+
+ # Test the SHARDED state, this is when only fresh_db_file exists.
+ os.unlink(db_path)
+ fresh_broker.reload_db_files()
+ self.assertEqual(fresh_broker.get_db_state(), 'sharded')
+
+ # Test the COLLAPSED state, this is when only fresh_db_file exists.
+ shard_range.deleted = 1
+ shard_range.timestamp = Timestamp.now()
+ fresh_broker.merge_shard_ranges([shard_range])
+ self.assertEqual(fresh_broker.get_db_state(), 'collapsed')
+
+ # back to UNSHARDED if the desired epoch changes
+ own_shard_range.update_state(ShardRange.SHRINKING,
+ state_timestamp=Timestamp.now())
+ own_shard_range.epoch = Timestamp.now()
+ fresh_broker.merge_shard_ranges([own_shard_range])
+ self.assertEqual(fresh_broker.get_db_state(), 'unsharded')
+
+ @with_tempdir
+ def test_db_file(self, tempdir):
+ acct = 'account'
+ cont = 'continer'
+ hsh = hash_path(acct, cont)
+ db_file = "%s.db" % hsh
+ ts_epoch = Timestamp.now()
+ fresh_db_file = "%s_%s.db" % (hsh, ts_epoch.normal)
+ db_path = os.path.join(tempdir, db_file)
+ fresh_db_path = os.path.join(tempdir, fresh_db_file)
+ ts = Timestamp.now()
+
+ # First test NOTFOUND state, this will return the db_file passed
+ # in the constructor
+ def check_unfound_db_files(broker, init_db_file):
+ self.assertEqual(init_db_file, broker.db_file)
+ self.assertEqual(broker._db_file, db_path)
+ self.assertFalse(os.path.exists(db_path))
+ self.assertFalse(os.path.exists(fresh_db_path))
+ self.assertEqual([], broker.db_files)
+
+ broker = ContainerBroker(db_path, account=acct, container=cont)
+ check_unfound_db_files(broker, db_path)
+ broker = ContainerBroker(fresh_db_path, account=acct, container=cont)
+ check_unfound_db_files(broker, fresh_db_path)
+
+ # Test UNSHARDED state, that is when db_file exists and fresh_db_file
+ # doesn't, so it should return the db_path
+ def check_unsharded_db_files(broker):
+ self.assertEqual(broker.db_file, db_path)
+ self.assertEqual(broker._db_file, db_path)
+ self.assertTrue(os.path.exists(db_path))
+ self.assertFalse(os.path.exists(fresh_db_path))
+ self.assertEqual([db_path], broker.db_files)
+
+ broker = ContainerBroker(db_path, account=acct, container=cont)
+ broker.initialize(ts.internal, 0)
+ check_unsharded_db_files(broker)
+ broker = ContainerBroker(fresh_db_path, account=acct, container=cont)
+ check_unsharded_db_files(broker)
+ # while UNSHARDED db_path is still used despite giving fresh_db_path
+ # to init, so we cannot initialize this broker
+ with self.assertRaises(DatabaseAlreadyExists):
+ broker.initialize(ts.internal, 0)
+
+ # Test the SHARDING state, this is the period when both the db_file and
+ # the fresh_db_file exist, in this case it should return the
+ # fresh_db_path.
+ def check_sharding_db_files(broker):
+ self.assertEqual(broker.db_file, fresh_db_path)
+ self.assertEqual(broker._db_file, db_path)
+ self.assertTrue(os.path.exists(db_path))
+ self.assertTrue(os.path.exists(fresh_db_path))
+ self.assertEqual([db_path, fresh_db_path], broker.db_files)
+
+ # Use force_db_file to have db_shard_path created when initializing
+ broker = ContainerBroker(fresh_db_path, account=acct,
+ container=cont, force_db_file=True)
+ self.assertEqual([db_path], broker.db_files)
+ broker.initialize(ts.internal, 0)
+ check_sharding_db_files(broker)
+ broker = ContainerBroker(db_path, account=acct, container=cont)
+ check_sharding_db_files(broker)
+ broker = ContainerBroker(fresh_db_path, account=acct, container=cont)
+ check_sharding_db_files(broker)
+
+ # force_db_file can be used to open db_path specifically
+ forced_broker = ContainerBroker(db_path, account=acct,
+ container=cont, force_db_file=True)
+ self.assertEqual(forced_broker.db_file, db_path)
+ self.assertEqual(forced_broker._db_file, db_path)
+
+ def check_sharded_db_files(broker):
+ self.assertEqual(broker.db_file, fresh_db_path)
+ self.assertEqual(broker._db_file, db_path)
+ self.assertFalse(os.path.exists(db_path))
+ self.assertTrue(os.path.exists(fresh_db_path))
+ self.assertEqual([fresh_db_path], broker.db_files)
+
+ # Test the SHARDED state, this is when only fresh_db_file exists, so
+ # obviously this should return the fresh_db_path
+ os.unlink(db_path)
+ broker.reload_db_files()
+ check_sharded_db_files(broker)
+ broker = ContainerBroker(db_path, account=acct, container=cont)
+ check_sharded_db_files(broker)
+
+ @with_tempdir
+ def test_sharding_initiated_and_required(self, tempdir):
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4())
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(Timestamp.now().internal, 0)
+ # no shard ranges
+ self.assertIs(False, broker.sharding_initiated())
+ self.assertIs(False, broker.sharding_required())
+ # only own shard range
+ own_sr = broker.get_own_shard_range()
+ for state in ShardRange.STATES:
+ own_sr.update_state(state, state_timestamp=Timestamp.now())
+ broker.merge_shard_ranges(own_sr)
+ self.assertIs(False, broker.sharding_initiated())
+ self.assertIs(False, broker.sharding_required())
+
+ # shard ranges, still ACTIVE
+ own_sr.update_state(ShardRange.ACTIVE,
+ state_timestamp=Timestamp.now())
+ broker.merge_shard_ranges(own_sr)
+ broker.merge_shard_ranges(ShardRange('.shards_a/cc', Timestamp.now()))
+ self.assertIs(False, broker.sharding_initiated())
+ self.assertIs(False, broker.sharding_required())
+
+ # shard ranges and SHARDING, SHRINKING or SHARDED
+ broker.enable_sharding(Timestamp.now())
+ self.assertTrue(broker.set_sharding_state())
+ self.assertIs(True, broker.sharding_initiated())
+ self.assertIs(True, broker.sharding_required())
+
+ epoch = broker.db_epoch
+ own_sr.update_state(ShardRange.SHRINKING,
+ state_timestamp=Timestamp.now())
+ own_sr.epoch = epoch
+ broker.merge_shard_ranges(own_sr)
+ self.assertIs(True, broker.sharding_initiated())
+ self.assertIs(True, broker.sharding_required())
+
+ own_sr.update_state(ShardRange.SHARDED)
+ broker.merge_shard_ranges(own_sr)
+ self.assertTrue(broker.set_sharded_state())
+ self.assertIs(True, broker.sharding_initiated())
+ self.assertIs(False, broker.sharding_required())
+
@with_tempdir
def test_put_object_multiple_encoded_timestamps_using_file(self, tempdir):
# Test ContainerBroker.put_object with differing data, content-type
@@ -629,7 +1467,7 @@ class TestContainerBroker(unittest.TestCase):
self._test_put_object_multiple_encoded_timestamps(broker)
def _test_put_object_multiple_explicit_timestamps(self, broker):
- ts = (Timestamp(t) for t in itertools.count(int(time())))
+ ts = make_timestamp_iter()
broker.initialize(next(ts).internal, 0)
t = [next(ts) for _ in range(11)]
@@ -733,7 +1571,7 @@ class TestContainerBroker(unittest.TestCase):
def test_last_modified_time(self):
# Test container listing reports the most recent of data or metadata
# timestamp as last-modified time
- ts = (Timestamp(t) for t in itertools.count(int(time())))
+ ts = make_timestamp_iter()
broker = ContainerBroker(':memory:', account='a', container='c')
broker.initialize(next(ts).internal, 0)
@@ -786,18 +1624,17 @@ class TestContainerBroker(unittest.TestCase):
@patch_policies
def test_put_misplaced_object_does_not_effect_container_stats(self):
policy = random.choice(list(POLICIES))
- ts = (Timestamp(t).internal for t in
- itertools.count(int(time())))
+ ts = make_timestamp_iter()
broker = ContainerBroker(':memory:',
account='a', container='c')
- broker.initialize(next(ts), policy.idx)
+ broker.initialize(next(ts).internal, policy.idx)
# migration tests may not honor policy on initialize
if isinstance(self, ContainerBrokerMigrationMixin):
real_storage_policy_index = \
broker.get_info()['storage_policy_index']
policy = [p for p in POLICIES
if p.idx == real_storage_policy_index][0]
- broker.put_object('correct_o', next(ts), 123, 'text/plain',
+ broker.put_object('correct_o', next(ts).internal, 123, 'text/plain',
'5af83e3196bf99f440f31f2e1a6c9afe',
storage_policy_index=policy.idx)
info = broker.get_info()
@@ -805,7 +1642,7 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(123, info['bytes_used'])
other_policy = random.choice([p for p in POLICIES
if p is not policy])
- broker.put_object('wrong_o', next(ts), 123, 'text/plain',
+ broker.put_object('wrong_o', next(ts).internal, 123, 'text/plain',
'5af83e3196bf99f440f31f2e1a6c9afe',
storage_policy_index=other_policy.idx)
self.assertEqual(1, info['object_count'])
@@ -814,23 +1651,22 @@ class TestContainerBroker(unittest.TestCase):
@patch_policies
def test_has_multiple_policies(self):
policy = random.choice(list(POLICIES))
- ts = (Timestamp(t).internal for t in
- itertools.count(int(time())))
+ ts = make_timestamp_iter()
broker = ContainerBroker(':memory:',
account='a', container='c')
- broker.initialize(next(ts), policy.idx)
+ broker.initialize(next(ts).internal, policy.idx)
# migration tests may not honor policy on initialize
if isinstance(self, ContainerBrokerMigrationMixin):
real_storage_policy_index = \
broker.get_info()['storage_policy_index']
policy = [p for p in POLICIES
if p.idx == real_storage_policy_index][0]
- broker.put_object('correct_o', next(ts), 123, 'text/plain',
+ broker.put_object('correct_o', next(ts).internal, 123, 'text/plain',
'5af83e3196bf99f440f31f2e1a6c9afe',
storage_policy_index=policy.idx)
self.assertFalse(broker.has_multiple_policies())
other_policy = [p for p in POLICIES if p is not policy][0]
- broker.put_object('wrong_o', next(ts), 123, 'text/plain',
+ broker.put_object('wrong_o', next(ts).internal, 123, 'text/plain',
'5af83e3196bf99f440f31f2e1a6c9afe',
storage_policy_index=other_policy.idx)
self.assertTrue(broker.has_multiple_policies())
@@ -838,11 +1674,10 @@ class TestContainerBroker(unittest.TestCase):
@patch_policies
def test_get_policy_info(self):
policy = random.choice(list(POLICIES))
- ts = (Timestamp(t).internal for t in
- itertools.count(int(time())))
+ ts = make_timestamp_iter()
broker = ContainerBroker(':memory:',
account='a', container='c')
- broker.initialize(next(ts), policy.idx)
+ broker.initialize(next(ts).internal, policy.idx)
# migration tests may not honor policy on initialize
if isinstance(self, ContainerBrokerMigrationMixin):
real_storage_policy_index = \
@@ -854,7 +1689,7 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(policy_stats, expected)
# add an object
- broker.put_object('correct_o', next(ts), 123, 'text/plain',
+ broker.put_object('correct_o', next(ts).internal, 123, 'text/plain',
'5af83e3196bf99f440f31f2e1a6c9afe',
storage_policy_index=policy.idx)
policy_stats = broker.get_policy_stats()
@@ -864,7 +1699,7 @@ class TestContainerBroker(unittest.TestCase):
# add a misplaced object
other_policy = random.choice([p for p in POLICIES
if p is not policy])
- broker.put_object('wrong_o', next(ts), 123, 'text/plain',
+ broker.put_object('wrong_o', next(ts).internal, 123, 'text/plain',
'5af83e3196bf99f440f31f2e1a6c9afe',
storage_policy_index=other_policy.idx)
policy_stats = broker.get_policy_stats()
@@ -876,15 +1711,14 @@ class TestContainerBroker(unittest.TestCase):
@patch_policies
def test_policy_stat_tracking(self):
- ts = (Timestamp(t).internal for t in
- itertools.count(int(time())))
+ ts = make_timestamp_iter()
broker = ContainerBroker(':memory:',
account='a', container='c')
# Note: in subclasses of this TestCase that inherit the
# ContainerBrokerMigrationMixin, passing POLICIES.default.idx here has
# no effect and broker.get_policy_stats() returns a dict with a single
# entry mapping policy index 0 to the container stats
- broker.initialize(next(ts), POLICIES.default.idx)
+ broker.initialize(next(ts).internal, POLICIES.default.idx)
stats = defaultdict(dict)
def assert_empty_default_policy_stats(policy_stats):
@@ -904,7 +1738,7 @@ class TestContainerBroker(unittest.TestCase):
policy_index = random.randint(0, iters * 0.1)
name = 'object-%s' % random.randint(0, iters * 0.1)
size = random.randint(0, iters)
- broker.put_object(name, next(ts), size, 'text/plain',
+ broker.put_object(name, next(ts).internal, size, 'text/plain',
'5af83e3196bf99f440f31f2e1a6c9afe',
storage_policy_index=policy_index)
# track the size of the latest timestamp put for each object
@@ -973,7 +1807,8 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(info['delete_timestamp'], '0')
if self.__class__ in (TestContainerBrokerBeforeMetadata,
TestContainerBrokerBeforeXSync,
- TestContainerBrokerBeforeSPI):
+ TestContainerBrokerBeforeSPI,
+ TestContainerBrokerBeforeShardRanges):
self.assertEqual(info['status_changed_at'], '0')
else:
self.assertEqual(info['status_changed_at'],
@@ -1019,6 +1854,84 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(info['x_container_sync_point1'], -1)
self.assertEqual(info['x_container_sync_point2'], -1)
+ @with_tempdir
+ def test_get_info_sharding_states(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'part', 'suffix', 'hash', 'hash.db')
+ broker = ContainerBroker(
+ db_path, account='myaccount', container='mycontainer')
+ broker.initialize(next(ts_iter).internal, 0)
+ broker.put_object('o1', next(ts_iter).internal, 123, 'text/plain',
+ 'fake etag')
+ sr = ShardRange('.shards_a/c', next(ts_iter))
+ broker.merge_shard_ranges(sr)
+
+ def check_info(expected):
+ errors = []
+ for k, v in expected.items():
+ if info.get(k) != v:
+ errors.append((k, v, info.get(k)))
+ if errors:
+ self.fail('Mismatches: %s' % ', '.join(
+ ['%s should be %s but got %s' % error
+ for error in errors]))
+
+ # unsharded
+ with mock.patch.object(
+ broker, 'get_shard_usage') as mock_get_shard_usage:
+ info = broker.get_info()
+ mock_get_shard_usage.assert_not_called()
+ check_info({'account': 'myaccount',
+ 'container': 'mycontainer',
+ 'object_count': 1,
+ 'bytes_used': 123,
+ 'db_state': 'unsharded'})
+
+ # sharding
+ epoch = next(ts_iter)
+ broker.enable_sharding(epoch)
+ self.assertTrue(broker.set_sharding_state())
+ broker.put_object('o2', next(ts_iter).internal, 1, 'text/plain',
+ 'fake etag')
+ broker.put_object('o3', next(ts_iter).internal, 320, 'text/plain',
+ 'fake etag')
+ with mock.patch.object(
+ broker, 'get_shard_usage') as mock_get_shard_usage:
+ info = broker.get_info()
+ mock_get_shard_usage.assert_not_called()
+ check_info({'account': 'myaccount',
+ 'container': 'mycontainer',
+ 'object_count': 1,
+ 'bytes_used': 123,
+ 'db_state': 'sharding'})
+
+ # sharded
+ self.assertTrue(broker.set_sharded_state())
+ shard_stats = {'object_count': 1001, 'bytes_used': 3003}
+ with mock.patch.object(
+ broker, 'get_shard_usage') as mock_get_shard_usage:
+ mock_get_shard_usage.return_value = shard_stats
+ info = broker.get_info()
+ mock_get_shard_usage.assert_called_once_with()
+ check_info({'account': 'myaccount',
+ 'container': 'mycontainer',
+ 'object_count': 1001,
+ 'bytes_used': 3003,
+ 'db_state': 'sharded'})
+
+ # collapsed
+ sr.set_deleted(next(ts_iter))
+ broker.merge_shard_ranges(sr)
+ with mock.patch.object(
+ broker, 'get_shard_usage') as mock_get_shard_usage:
+ info = broker.get_info()
+ mock_get_shard_usage.assert_not_called()
+ check_info({'account': 'myaccount',
+ 'container': 'mycontainer',
+ 'object_count': 2,
+ 'bytes_used': 321,
+ 'db_state': 'collapsed'})
+
def test_set_x_syncs(self):
broker = ContainerBroker(':memory:', account='test1',
container='test2')
@@ -1100,6 +2013,174 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(info['reported_object_count'], 2)
self.assertEqual(info['reported_bytes_used'], 1123)
+ @with_tempdir
+ def test_remove_objects(self, tempdir):
+ objects = (('undeleted', Timestamp.now().internal, 0, 'text/plain',
+ EMPTY_ETAG, 0, 0),
+ ('other_policy', Timestamp.now().internal, 0, 'text/plain',
+ EMPTY_ETAG, 0, 1),
+ ('deleted', Timestamp.now().internal, 0, 'text/plain',
+ EMPTY_ETAG, 1, 0))
+ object_names = [o[0] for o in objects]
+
+ def get_rows(broker):
+ with broker.get() as conn:
+ cursor = conn.execute("SELECT * FROM object")
+ return [r[1] for r in cursor]
+
+ def do_setup():
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4())
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(Timestamp.now().internal, 0)
+ for obj in objects:
+ # ensure row order matches put order
+ broker.put_object(*obj)
+ broker._commit_puts()
+
+ self.assertEqual(3, broker.get_max_row()) # sanity check
+ self.assertEqual(object_names, get_rows(broker)) # sanity check
+ return broker
+
+ broker = do_setup()
+ broker.remove_objects('', '')
+ self.assertFalse(get_rows(broker))
+
+ broker = do_setup()
+ broker.remove_objects('deleted', '')
+ self.assertEqual([object_names[2]], get_rows(broker))
+
+ broker = do_setup()
+ broker.remove_objects('', 'deleted', max_row=2)
+ self.assertEqual(object_names, get_rows(broker))
+
+ broker = do_setup()
+ broker.remove_objects('deleted', 'un')
+ self.assertEqual([object_names[0], object_names[2]], get_rows(broker))
+
+ broker = do_setup()
+ broker.remove_objects('', '', max_row=-1)
+ self.assertEqual(object_names, get_rows(broker))
+
+ broker = do_setup()
+ broker.remove_objects('', '', max_row=0)
+ self.assertEqual(object_names, get_rows(broker))
+
+ broker = do_setup()
+ broker.remove_objects('', '', max_row=1)
+ self.assertEqual(object_names[1:], get_rows(broker))
+
+ broker = do_setup()
+ broker.remove_objects('', '', max_row=2)
+ self.assertEqual(object_names[2:], get_rows(broker))
+
+ broker = do_setup()
+ broker.remove_objects('', '', max_row=3)
+ self.assertFalse(get_rows(broker))
+
+ broker = do_setup()
+ broker.remove_objects('', '', max_row=99)
+ self.assertFalse(get_rows(broker))
+
+ def test_get_objects(self):
+ broker = ContainerBroker(':memory:', account='a', container='c')
+ broker.initialize(Timestamp('1').internal, 0)
+ ts_iter = make_timestamp_iter()
+ objects_0 = [{'name': 'obj_0_%d' % i,
+ 'created_at': next(ts_iter).normal,
+ 'content_type': 'text/plain',
+ 'etag': 'etag_%d' % i,
+ 'size': 1024 * i,
+ 'deleted': i % 2,
+ 'storage_policy_index': 0
+ } for i in range(1, 8)]
+ objects_1 = [{'name': 'obj_1_%d' % i,
+ 'created_at': next(ts_iter).normal,
+ 'content_type': 'text/plain',
+ 'etag': 'etag_%d' % i,
+ 'size': 1024 * i,
+ 'deleted': i % 2,
+ 'storage_policy_index': 1
+ } for i in range(1, 8)]
+ # merge_objects mutates items
+ broker.merge_items([dict(obj) for obj in objects_0 + objects_1])
+
+ actual = broker.get_objects()
+ self.assertEqual(objects_0 + objects_1, actual)
+
+ with mock.patch('swift.container.backend.CONTAINER_LISTING_LIMIT', 2):
+ actual = broker.get_objects()
+ self.assertEqual(objects_0[:2], actual)
+
+ with mock.patch('swift.container.backend.CONTAINER_LISTING_LIMIT', 2):
+ actual = broker.get_objects(limit=9)
+ self.assertEqual(objects_0 + objects_1[:2], actual)
+
+ actual = broker.get_objects(marker=objects_0[2]['name'])
+ self.assertEqual(objects_0[3:] + objects_1, actual)
+
+ actual = broker.get_objects(end_marker=objects_0[2]['name'])
+ self.assertEqual(objects_0[:2], actual)
+
+ actual = broker.get_objects(include_deleted=True)
+ self.assertEqual(objects_0[::2] + objects_1[::2], actual)
+
+ actual = broker.get_objects(include_deleted=False)
+ self.assertEqual(objects_0[1::2] + objects_1[1::2], actual)
+
+ actual = broker.get_objects(include_deleted=None)
+ self.assertEqual(objects_0 + objects_1, actual)
+
+ def test_get_objects_since_row(self):
+ ts_iter = make_timestamp_iter()
+ broker = ContainerBroker(':memory:', account='a', container='c')
+ broker.initialize(Timestamp('1').internal, 0)
+ obj_names = ['obj%03d' % i for i in range(20)]
+ timestamps = [next(ts_iter) for o in obj_names]
+ for name, timestamp in zip(obj_names, timestamps):
+ broker.put_object(name, timestamp.internal,
+ 0, 'text/plain', EMPTY_ETAG)
+ broker._commit_puts() # ensure predictable row order
+ timestamps = [next(ts_iter) for o in obj_names[10:]]
+ for name, timestamp in zip(obj_names[10:], timestamps):
+ broker.put_object(name, timestamp.internal,
+ 0, 'text/plain', EMPTY_ETAG, deleted=1)
+ broker._commit_puts() # ensure predictable row order
+
+ # sanity check
+ self.assertEqual(30, broker.get_max_row())
+ actual = broker.get_objects()
+ self.assertEqual(obj_names, [o['name'] for o in actual])
+
+ # all rows included
+ actual = broker.get_objects(since_row=None)
+ self.assertEqual(obj_names, [o['name'] for o in actual])
+
+ actual = broker.get_objects(since_row=-1)
+ self.assertEqual(obj_names, [o['name'] for o in actual])
+
+ # selected rows
+ for since_row in range(10):
+ actual = broker.get_objects(since_row=since_row)
+ with annotate_failure(since_row):
+ self.assertEqual(obj_names[since_row:],
+ [o['name'] for o in actual])
+
+ for since_row in range(10, 20):
+ actual = broker.get_objects(since_row=since_row)
+ with annotate_failure(since_row):
+ self.assertEqual(obj_names[10:],
+ [o['name'] for o in actual])
+
+ for since_row in range(20, len(obj_names) + 1):
+ actual = broker.get_objects(since_row=since_row)
+ with annotate_failure(since_row):
+ self.assertEqual(obj_names[since_row - 10:],
+ [o['name'] for o in actual])
+
+ self.assertFalse(broker.get_objects(end_marker=obj_names[5],
+ since_row=5))
+
def test_list_objects_iter(self):
# Test ContainerBroker.list_objects_iter
broker = ContainerBroker(':memory:', account='a', container='c')
@@ -1832,6 +2913,21 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(['a', 'b', 'c'],
sorted([rec['name'] for rec in items]))
+ @with_tempdir
+ def test_merge_items_is_green(self, tempdir):
+ ts = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'container.db')
+
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(next(ts).internal, 1)
+
+ broker.put_object('b', next(ts).internal, 0, 'text/plain',
+ EMPTY_ETAG)
+
+ with mock.patch('swift.container.backend.tpool') as mock_tpool:
+ broker.get_info()
+ mock_tpool.execute.assert_called_once()
+
def test_merge_items_overwrite_unicode(self):
# test DatabaseBroker.merge_items
snowman = u'\N{SNOWMAN}'.encode('utf-8')
@@ -1930,12 +3026,11 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(rec['content_type'], 'text/plain')
def test_set_storage_policy_index(self):
- ts = (Timestamp(t).internal for t in
- itertools.count(int(time())))
+ ts = make_timestamp_iter()
broker = ContainerBroker(':memory:', account='test_account',
container='test_container')
timestamp = next(ts)
- broker.initialize(timestamp, 0)
+ broker.initialize(timestamp.internal, 0)
info = broker.get_info()
self.assertEqual(0, info['storage_policy_index']) # sanity check
@@ -1943,42 +3038,44 @@ class TestContainerBroker(unittest.TestCase):
self.assertEqual(0, info['bytes_used'])
if self.__class__ in (TestContainerBrokerBeforeMetadata,
TestContainerBrokerBeforeXSync,
- TestContainerBrokerBeforeSPI):
+ TestContainerBrokerBeforeSPI,
+ TestContainerBrokerBeforeShardRanges):
self.assertEqual(info['status_changed_at'], '0')
else:
- self.assertEqual(timestamp, info['status_changed_at'])
+ self.assertEqual(timestamp.internal, info['status_changed_at'])
expected = {0: {'object_count': 0, 'bytes_used': 0}}
self.assertEqual(expected, broker.get_policy_stats())
timestamp = next(ts)
- broker.set_storage_policy_index(111, timestamp)
+ broker.set_storage_policy_index(111, timestamp.internal)
self.assertEqual(broker.storage_policy_index, 111)
info = broker.get_info()
self.assertEqual(111, info['storage_policy_index'])
self.assertEqual(0, info['object_count'])
self.assertEqual(0, info['bytes_used'])
- self.assertEqual(timestamp, info['status_changed_at'])
+ self.assertEqual(timestamp.internal, info['status_changed_at'])
expected[111] = {'object_count': 0, 'bytes_used': 0}
self.assertEqual(expected, broker.get_policy_stats())
timestamp = next(ts)
- broker.set_storage_policy_index(222, timestamp)
+ broker.set_storage_policy_index(222, timestamp.internal)
self.assertEqual(broker.storage_policy_index, 222)
info = broker.get_info()
self.assertEqual(222, info['storage_policy_index'])
self.assertEqual(0, info['object_count'])
self.assertEqual(0, info['bytes_used'])
- self.assertEqual(timestamp, info['status_changed_at'])
+ self.assertEqual(timestamp.internal, info['status_changed_at'])
expected[222] = {'object_count': 0, 'bytes_used': 0}
self.assertEqual(expected, broker.get_policy_stats())
old_timestamp, timestamp = timestamp, next(ts)
- broker.set_storage_policy_index(222, timestamp) # it's idempotent
+ # setting again is idempotent
+ broker.set_storage_policy_index(222, timestamp.internal)
info = broker.get_info()
self.assertEqual(222, info['storage_policy_index'])
self.assertEqual(0, info['object_count'])
self.assertEqual(0, info['bytes_used'])
- self.assertEqual(old_timestamp, info['status_changed_at'])
+ self.assertEqual(old_timestamp.internal, info['status_changed_at'])
self.assertEqual(expected, broker.get_policy_stats())
def test_set_storage_policy_index_empty(self):
@@ -2004,19 +3101,18 @@ class TestContainerBroker(unittest.TestCase):
@with_tempdir
def test_legacy_pending_files(self, tempdir):
- ts = (Timestamp(t).internal for t in
- itertools.count(int(time())))
+ ts = make_timestamp_iter()
db_path = os.path.join(tempdir, 'container.db')
# first init an acct DB without the policy_stat table present
broker = ContainerBroker(db_path, account='a', container='c')
- broker.initialize(next(ts), 1)
+ broker.initialize(next(ts).internal, 1)
# manually make some pending entries lacking storage_policy_index
with open(broker.pending_file, 'a+b') as fp:
for i in range(10):
name, timestamp, size, content_type, etag, deleted = (
- 'o%s' % i, next(ts), 0, 'c', 'e', 0)
+ 'o%s' % i, next(ts).internal, 0, 'c', 'e', 0)
fp.write(':')
fp.write(pickle.dumps(
(name, timestamp, size, content_type, etag, deleted),
@@ -2033,7 +3129,7 @@ class TestContainerBroker(unittest.TestCase):
else:
size = 2
storage_policy_index = 1
- broker.put_object(name, next(ts), size, 'c', 'e', 0,
+ broker.put_object(name, next(ts).internal, size, 'c', 'e', 0,
storage_policy_index=storage_policy_index)
broker._commit_puts_stale_ok()
@@ -2049,8 +3145,7 @@ class TestContainerBroker(unittest.TestCase):
@with_tempdir
def test_get_info_no_stale_reads(self, tempdir):
- ts = (Timestamp(t).internal for t in
- itertools.count(int(time())))
+ ts = make_timestamp_iter()
db_path = os.path.join(tempdir, 'container.db')
def mock_commit_puts():
@@ -2058,13 +3153,13 @@ class TestContainerBroker(unittest.TestCase):
broker = ContainerBroker(db_path, account='a', container='c',
stale_reads_ok=False)
- broker.initialize(next(ts), 1)
+ broker.initialize(next(ts).internal, 1)
# manually make some pending entries
with open(broker.pending_file, 'a+b') as fp:
for i in range(10):
name, timestamp, size, content_type, etag, deleted = (
- 'o%s' % i, next(ts), 0, 'c', 'e', 0)
+ 'o%s' % i, next(ts).internal, 0, 'c', 'e', 0)
fp.write(':')
fp.write(pickle.dumps(
(name, timestamp, size, content_type, etag, deleted),
@@ -2079,8 +3174,7 @@ class TestContainerBroker(unittest.TestCase):
@with_tempdir
def test_get_info_stale_read_ok(self, tempdir):
- ts = (Timestamp(t).internal for t in
- itertools.count(int(time())))
+ ts = make_timestamp_iter()
db_path = os.path.join(tempdir, 'container.db')
def mock_commit_puts():
@@ -2088,13 +3182,13 @@ class TestContainerBroker(unittest.TestCase):
broker = ContainerBroker(db_path, account='a', container='c',
stale_reads_ok=True)
- broker.initialize(next(ts), 1)
+ broker.initialize(next(ts).internal, 1)
# manually make some pending entries
with open(broker.pending_file, 'a+b') as fp:
for i in range(10):
name, timestamp, size, content_type, etag, deleted = (
- 'o%s' % i, next(ts), 0, 'c', 'e', 0)
+ 'o%s' % i, next(ts).internal, 0, 'c', 'e', 0)
fp.write(':')
fp.write(pickle.dumps(
(name, timestamp, size, content_type, etag, deleted),
@@ -2104,6 +3198,1257 @@ class TestContainerBroker(unittest.TestCase):
broker._commit_puts = mock_commit_puts
broker.get_info()
+ @with_tempdir
+ def test_create_broker(self, tempdir):
+ broker = ContainerBroker.create_broker(tempdir, 0, 'a', 'c')
+ hsh = hash_path('a', 'c')
+ expected_path = os.path.join(
+ tempdir, 'containers', '0', hsh[-3:], hsh, hsh + '.db')
+ self.assertEqual(expected_path, broker.db_file)
+ self.assertTrue(os.path.isfile(expected_path))
+
+ ts = Timestamp.now()
+ broker = ContainerBroker.create_broker(tempdir, 0, 'a', 'c1',
+ put_timestamp=ts.internal)
+ hsh = hash_path('a', 'c1')
+ expected_path = os.path.join(
+ tempdir, 'containers', '0', hsh[-3:], hsh, hsh + '.db')
+ self.assertEqual(expected_path, broker.db_file)
+ self.assertTrue(os.path.isfile(expected_path))
+ self.assertEqual(ts.internal, broker.get_info()['put_timestamp'])
+ self.assertEqual(0, broker.get_info()['storage_policy_index'])
+
+ epoch = Timestamp.now()
+ broker = ContainerBroker.create_broker(tempdir, 0, 'a', 'c3',
+ epoch=epoch)
+ hsh = hash_path('a', 'c3')
+ expected_path = os.path.join(
+ tempdir, 'containers', '0', hsh[-3:],
+ hsh, '%s_%s.db' % (hsh, epoch.internal))
+ self.assertEqual(expected_path, broker.db_file)
+
+ @with_tempdir
+ def test_pending_file_name(self, tempdir):
+ # pending file should have same name for sharded or unsharded db
+ expected_pending_path = os.path.join(tempdir, 'container.db.pending')
+
+ db_path = os.path.join(tempdir, 'container.db')
+ fresh_db_path = os.path.join(tempdir, 'container_epoch.db')
+
+ def do_test(given_db_file, expected_db_file):
+ broker = ContainerBroker(given_db_file, account='a', container='c')
+ self.assertEqual(expected_pending_path, broker.pending_file)
+ self.assertEqual(expected_db_file, broker.db_file)
+
+ # no files exist
+ do_test(db_path, db_path)
+ do_test(fresh_db_path, fresh_db_path)
+
+ # only container.db exists - unsharded
+ with open(db_path, 'wb'):
+ pass
+ do_test(db_path, db_path)
+ do_test(fresh_db_path, db_path)
+
+ # container.db and container_shard.db exist - sharding
+ with open(fresh_db_path, 'wb'):
+ pass
+ do_test(db_path, fresh_db_path)
+ do_test(fresh_db_path, fresh_db_path)
+
+ # only container_shard.db exists - sharded
+ os.unlink(db_path)
+ do_test(db_path, fresh_db_path)
+ do_test(fresh_db_path, fresh_db_path)
+
+ @with_tempdir
+ def test_sharding_sysmeta(self, tempdir):
+ db_path = os.path.join(tempdir, 'container.db')
+ broker = ContainerBroker(
+ db_path, account='myaccount', container='mycontainer')
+ broker.initialize(Timestamp.now().internal)
+
+ expected = 'aaa/ccc'
+ with mock_timestamp_now() as now:
+ broker.set_sharding_sysmeta('Root', expected)
+ actual = broker.metadata
+ self.assertEqual([expected, now.internal],
+ actual.get('X-Container-Sysmeta-Shard-Root'))
+ self.assertEqual(expected, broker.get_sharding_sysmeta('Root'))
+
+ expected = {'key': 'value'}
+ with mock_timestamp_now() as now:
+ broker.set_sharding_sysmeta('test', expected)
+ actual = broker.metadata
+ self.assertEqual([expected, now.internal],
+ actual.get('X-Container-Sysmeta-Shard-test'))
+ self.assertEqual(expected, broker.get_sharding_sysmeta('test'))
+
+ @with_tempdir
+ def test_path(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'container.db')
+ broker = ContainerBroker(
+ db_path, account='myaccount', container='mycontainer')
+ broker.initialize(next(ts_iter).internal, 1)
+ # make sure we can cope with unitialized account and container
+ broker.account = broker.container = None
+ self.assertEqual('myaccount/mycontainer', broker.path)
+
+ @with_tempdir
+ def test_root_account_container_path(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'container.db')
+ broker = ContainerBroker(
+ db_path, account='root_a', container='root_c')
+ broker.initialize(next(ts_iter).internal, 1)
+ # make sure we can cope with unitialized account and container
+ broker.account = broker.container = None
+
+ self.assertEqual('root_a', broker.root_account)
+ self.assertEqual('root_c', broker.root_container)
+ self.assertEqual('root_a/root_c', broker.root_path)
+ self.assertTrue(broker.is_root_container())
+ self.assertEqual('root_a', broker.account) # sanity check
+ self.assertEqual('root_c', broker.container) # sanity check
+
+ # we don't expect root containers to have this sysmeta set but if it is
+ # the broker should still behave like a root container
+ metadata = {
+ 'X-Container-Sysmeta-Shard-Root':
+ ('root_a/root_c', next(ts_iter).internal)}
+ broker = ContainerBroker(
+ db_path, account='root_a', container='root_c')
+ broker.update_metadata(metadata)
+ broker.account = broker.container = None
+ self.assertEqual('root_a', broker.root_account)
+ self.assertEqual('root_c', broker.root_container)
+ self.assertEqual('root_a/root_c', broker.root_path)
+ self.assertTrue(broker.is_root_container())
+
+ # if root is marked deleted, it still considers itself to be a root
+ broker.delete_db(next(ts_iter).internal)
+ self.assertEqual('root_a', broker.root_account)
+ self.assertEqual('root_c', broker.root_container)
+ self.assertEqual('root_a/root_c', broker.root_path)
+ self.assertTrue(broker.is_root_container())
+ # check the values are not just being cached
+ broker = ContainerBroker(db_path)
+ self.assertEqual('root_a', broker.root_account)
+ self.assertEqual('root_c', broker.root_container)
+ self.assertEqual('root_a/root_c', broker.root_path)
+ self.assertTrue(broker.is_root_container())
+
+ # check a shard container
+ db_path = os.path.join(tempdir, 'shard_container.db')
+ broker = ContainerBroker(
+ db_path, account='.shards_root_a', container='c_shard')
+ broker.initialize(next(ts_iter).internal, 1)
+ # now the metadata is significant...
+ metadata = {
+ 'X-Container-Sysmeta-Shard-Root':
+ ('root_a/root_c', next(ts_iter).internal)}
+ broker.update_metadata(metadata)
+ broker.account = broker.container = None
+ broker._root_account = broker._root_container = None
+
+ self.assertEqual('root_a', broker.root_account)
+ self.assertEqual('root_c', broker.root_container)
+ self.assertEqual('root_a/root_c', broker.root_path)
+ self.assertFalse(broker.is_root_container())
+
+ # check validation
+ def check_validation(root_value):
+ metadata = {
+ 'X-Container-Sysmeta-Shard-Root':
+ (root_value, next(ts_iter).internal)}
+ broker.update_metadata(metadata)
+ broker.account = broker.container = None
+ broker._root_account = broker._root_container = None
+ with self.assertRaises(ValueError) as cm:
+ broker.root_account
+ self.assertIn('Expected X-Container-Sysmeta-Shard-Root',
+ str(cm.exception))
+ with self.assertRaises(ValueError):
+ broker.root_container
+
+ check_validation('root_a')
+ check_validation('/root_a')
+ check_validation('/root_a/root_c')
+ check_validation('/root_a/root_c/blah')
+ check_validation('/')
+
+ def test_resolve_shard_range_states(self):
+ self.assertIsNone(ContainerBroker.resolve_shard_range_states(None))
+ self.assertIsNone(ContainerBroker.resolve_shard_range_states([]))
+
+ for state_num, state_name in ShardRange.STATES.items():
+ self.assertEqual({state_num},
+ ContainerBroker.resolve_shard_range_states(
+ [state_name]))
+ self.assertEqual({state_num},
+ ContainerBroker.resolve_shard_range_states(
+ [state_num]))
+
+ self.assertEqual(set(ShardRange.STATES),
+ ContainerBroker.resolve_shard_range_states(
+ ShardRange.STATES_BY_NAME))
+
+ self.assertEqual(
+ set(ShardRange.STATES),
+ ContainerBroker.resolve_shard_range_states(ShardRange.STATES))
+
+ # check aliases
+ self.assertEqual(
+ {ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING,
+ ShardRange.SHRINKING},
+ ContainerBroker.resolve_shard_range_states(['listing']))
+
+ self.assertEqual(
+ {ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING,
+ ShardRange.SHRINKING},
+ ContainerBroker.resolve_shard_range_states(['listing', 'active']))
+
+ self.assertEqual(
+ {ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING,
+ ShardRange.SHRINKING, ShardRange.CREATED},
+ ContainerBroker.resolve_shard_range_states(['listing', 'created']))
+
+ self.assertEqual(
+ {ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE,
+ ShardRange.SHARDING},
+ ContainerBroker.resolve_shard_range_states(['updating']))
+
+ self.assertEqual(
+ {ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE,
+ ShardRange.SHARDING, ShardRange.SHRINKING},
+ ContainerBroker.resolve_shard_range_states(
+ ['updating', 'listing']))
+
+ def check_bad_value(value):
+ with self.assertRaises(ValueError) as cm:
+ ContainerBroker.resolve_shard_range_states(value)
+ self.assertIn('Invalid state', str(cm.exception))
+
+ check_bad_value(['bad_state', 'active'])
+ check_bad_value([''])
+ check_bad_value('active')
+
+ @with_tempdir
+ def test_get_shard_ranges(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'container.db')
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(next(ts_iter).internal, 0)
+
+ # no rows
+ self.assertFalse(broker.get_shard_ranges())
+ # check that a default own shard range is not generated
+ self.assertFalse(broker.get_shard_ranges(include_own=True))
+
+ # merge row for own shard range
+ own_shard_range = ShardRange(broker.path, next(ts_iter), 'l', 'u',
+ state=ShardRange.SHARDING)
+ broker.merge_shard_ranges([own_shard_range])
+ self.assertFalse(broker.get_shard_ranges())
+ self.assertFalse(broker.get_shard_ranges(include_own=False))
+
+ actual = broker.get_shard_ranges(include_own=True)
+ self.assertEqual([dict(sr) for sr in [own_shard_range]],
+ [dict(sr) for sr in actual])
+
+ # merge rows for other shard ranges
+ shard_ranges = [
+ ShardRange('.a/c0', next(ts_iter), 'a', 'c'),
+ ShardRange('.a/c1', next(ts_iter), 'c', 'd'),
+ ShardRange('.a/c2', next(ts_iter), 'd', 'f',
+ state=ShardRange.ACTIVE),
+ ShardRange('.a/c3', next(ts_iter), 'e', 'f', deleted=1,
+ state=ShardRange.SHARDED,),
+ ShardRange('.a/c4', next(ts_iter), 'f', 'h',
+ state=ShardRange.CREATED),
+ ShardRange('.a/c5', next(ts_iter), 'h', 'j', deleted=1)
+ ]
+ broker.merge_shard_ranges(shard_ranges)
+ actual = broker.get_shard_ranges()
+ undeleted = shard_ranges[:3] + shard_ranges[4:5]
+ self.assertEqual([dict(sr) for sr in undeleted],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(include_deleted=True)
+ self.assertEqual([dict(sr) for sr in shard_ranges],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(reverse=True)
+ self.assertEqual([dict(sr) for sr in reversed(undeleted)],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(marker='c', end_marker='e')
+ self.assertEqual([dict(sr) for sr in shard_ranges[1:3]],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(marker='c', end_marker='e',
+ states=ShardRange.ACTIVE)
+ self.assertEqual([dict(sr) for sr in shard_ranges[2:3]],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(marker='e', end_marker='e')
+ self.assertFalse([dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(includes='f')
+ self.assertEqual([dict(sr) for sr in shard_ranges[2:3]],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(includes='i')
+ self.assertFalse(actual)
+
+ actual = broker.get_shard_ranges(
+ states=[ShardRange.CREATED, ShardRange.ACTIVE])
+ self.assertEqual(
+ [dict(sr) for sr in [shard_ranges[2], shard_ranges[4]]],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(exclude_states=ShardRange.CREATED)
+ self.assertEqual([dict(sr) for sr in shard_ranges[:3]],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(
+ exclude_states=[ShardRange.CREATED, ShardRange.ACTIVE])
+ self.assertEqual([dict(sr) for sr in shard_ranges[:2]],
+ [dict(sr) for sr in actual])
+
+ # exclude_states takes precedence
+ actual = broker.get_shard_ranges(
+ states=ShardRange.CREATED, exclude_states=ShardRange.CREATED)
+ self.assertEqual([dict(sr) for sr in shard_ranges[:3]],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(states=[ShardRange.CREATED],
+ exclude_states=[ShardRange.ACTIVE])
+ self.assertEqual([dict(sr) for sr in shard_ranges[4:5]],
+ [dict(sr) for sr in actual])
+
+ # get everything
+ actual = broker.get_shard_ranges(include_own=True)
+ self.assertEqual([dict(sr) for sr in undeleted + [own_shard_range]],
+ [dict(sr) for sr in actual])
+
+ # get just own range
+ actual = broker.get_shard_ranges(include_own=True, exclude_others=True)
+ self.assertEqual([dict(sr) for sr in [own_shard_range]],
+ [dict(sr) for sr in actual])
+
+ # exclude_states overrides include_own
+ actual = broker.get_shard_ranges(include_own=True,
+ exclude_states=ShardRange.SHARDING,
+ exclude_others=True)
+ self.assertFalse(actual)
+
+ # if you ask for nothing you'll get nothing
+ actual = broker.get_shard_ranges(
+ include_own=False, exclude_others=True)
+ self.assertFalse(actual)
+
+ @with_tempdir
+ def test_get_shard_ranges_with_sharding_overlaps(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'container.db')
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(next(ts_iter).internal, 0)
+ shard_ranges = [
+ ShardRange('.shards_a/c0', next(ts_iter), 'a', 'd',
+ state=ShardRange.ACTIVE),
+ ShardRange('.shards_a/c1_0', next(ts_iter), 'd', 'g',
+ state=ShardRange.CLEAVED),
+ ShardRange('.shards_a/c1_1', next(ts_iter), 'g', 'j',
+ state=ShardRange.CLEAVED),
+ ShardRange('.shards_a/c1_2', next(ts_iter), 'j', 'm',
+ state=ShardRange.CREATED),
+ ShardRange('.shards_a/c1', next(ts_iter), 'd', 'm',
+ state=ShardRange.SHARDING),
+ ShardRange('.shards_a/c2', next(ts_iter), 'm', '',
+ state=ShardRange.ACTIVE),
+ ]
+ broker.merge_shard_ranges(
+ random.sample(shard_ranges, len(shard_ranges)))
+ actual = broker.get_shard_ranges()
+ self.assertEqual([dict(sr) for sr in shard_ranges],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(states=SHARD_LISTING_STATES)
+ self.assertEqual(
+ [dict(sr) for sr in shard_ranges[:3] + shard_ranges[4:]],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES,
+ includes='e')
+ self.assertEqual([shard_ranges[1]], actual)
+ actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES,
+ includes='j')
+ self.assertEqual([shard_ranges[2]], actual)
+ actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES,
+ includes='k')
+ self.assertEqual([shard_ranges[3]], actual)
+
+ @with_tempdir
+ def test_get_shard_ranges_with_shrinking_overlaps(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'container.db')
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(next(ts_iter).internal, 0)
+ shard_ranges = [
+ ShardRange('.shards_a/c0', next(ts_iter), 'a', 'k',
+ state=ShardRange.ACTIVE),
+ ShardRange('.shards_a/c1', next(ts_iter), 'k', 'm',
+ state=ShardRange.SHRINKING),
+ ShardRange('.shards_a/c2', next(ts_iter), 'k', 't',
+ state=ShardRange.ACTIVE),
+ ShardRange('.shards_a/c3', next(ts_iter), 't', '',
+ state=ShardRange.ACTIVE),
+ ]
+ broker.merge_shard_ranges(
+ random.sample(shard_ranges, len(shard_ranges)))
+ actual = broker.get_shard_ranges()
+ self.assertEqual([dict(sr) for sr in shard_ranges],
+ [dict(sr) for sr in actual])
+
+ actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES,
+ includes='l')
+ self.assertEqual([shard_ranges[2]], actual)
+
+ @with_tempdir
+ def test_get_own_shard_range(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'container.db')
+ broker = ContainerBroker(
+ db_path, account='.shards_a', container='shard_c')
+ broker.initialize(next(ts_iter).internal, 0)
+
+ # no row for own shard range - expect entire namespace default
+ now = Timestamp.now()
+ expected = ShardRange(broker.path, now, '', '', 0, 0, now,
+ state=ShardRange.ACTIVE)
+ with mock.patch('swift.container.backend.Timestamp.now',
+ return_value=now):
+ actual = broker.get_own_shard_range()
+ self.assertEqual(dict(expected), dict(actual))
+
+ actual = broker.get_own_shard_range(no_default=True)
+ self.assertIsNone(actual)
+
+ # row for own shard range and others
+ ts_1 = next(ts_iter)
+ own_sr = ShardRange(broker.path, ts_1, 'l', 'u')
+ broker.merge_shard_ranges(
+ [own_sr,
+ ShardRange('.a/c1', next(ts_iter), 'b', 'c'),
+ ShardRange('.a/c2', next(ts_iter), 'c', 'd')])
+ expected = ShardRange(broker.path, ts_1, 'l', 'u', 0, 0, now)
+ with mock.patch('swift.container.backend.Timestamp.now',
+ return_value=now):
+ actual = broker.get_own_shard_range()
+ self.assertEqual(dict(expected), dict(actual))
+
+ # check stats get updated
+ broker.put_object(
+ 'o1', next(ts_iter).internal, 100, 'text/plain', 'etag1')
+ broker.put_object(
+ 'o2', next(ts_iter).internal, 99, 'text/plain', 'etag2')
+ expected = ShardRange(
+ broker.path, ts_1, 'l', 'u', 2, 199, now)
+ with mock.patch('swift.container.backend.Timestamp.now',
+ return_value=now):
+ actual = broker.get_own_shard_range()
+ self.assertEqual(dict(expected), dict(actual))
+
+ # still returned when deleted
+ delete_ts = next(ts_iter)
+ own_sr.set_deleted(timestamp=delete_ts)
+ broker.merge_shard_ranges(own_sr)
+ with mock.patch('swift.container.backend.Timestamp.now',
+ return_value=now):
+ actual = broker.get_own_shard_range()
+ expected = ShardRange(
+ broker.path, delete_ts, 'l', 'u', 2, 199, now, deleted=True)
+ self.assertEqual(dict(expected), dict(actual))
+
+ # still in table after reclaim_age
+ broker.reclaim(next(ts_iter).internal, next(ts_iter).internal)
+ with mock.patch('swift.container.backend.Timestamp.now',
+ return_value=now):
+ actual = broker.get_own_shard_range()
+ self.assertEqual(dict(expected), dict(actual))
+
+ # entire namespace
+ ts_2 = next(ts_iter)
+ broker.merge_shard_ranges(
+ [ShardRange(broker.path, ts_2, '', '')])
+ expected = ShardRange(
+ broker.path, ts_2, '', '', 2, 199, now)
+ with mock.patch('swift.container.backend.Timestamp.now',
+ return_value=now):
+ actual = broker.get_own_shard_range()
+ self.assertEqual(dict(expected), dict(actual))
+
+ @with_tempdir
+ def test_enable_sharding(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'container.db')
+ broker = ContainerBroker(
+ db_path, account='.shards_a', container='shard_c')
+ broker.initialize(next(ts_iter).internal, 0)
+ epoch = next(ts_iter)
+ broker.enable_sharding(epoch)
+ own_sr = broker.get_own_shard_range(no_default=True)
+ self.assertEqual(epoch, own_sr.epoch)
+ self.assertEqual(epoch, own_sr.state_timestamp)
+ self.assertEqual(ShardRange.SHARDING, own_sr.state)
+
+ @with_tempdir
+ def test_get_shard_usage(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ shard_range_by_state = dict(
+ (state, ShardRange('.shards_a/c_%s' % state, next(ts_iter),
+ str(state), str(state + 1),
+ 2 * state, 2 * state + 1, 2,
+ state=state))
+ for state in ShardRange.STATES)
+
+ def make_broker(a, c):
+ db_path = os.path.join(tempdir, '%s.db' % uuid4())
+ broker = ContainerBroker(db_path, account=a, container=c)
+ broker.initialize(next(ts_iter).internal, 0)
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ broker.merge_shard_ranges(shard_range_by_state.values())
+ return broker
+
+ # make broker appear to be a root container
+ broker = make_broker('a', 'c')
+ self.assertTrue(broker.is_root_container())
+ included_states = (ShardRange.ACTIVE, ShardRange.SHARDING,
+ ShardRange.SHRINKING)
+ included = [shard_range_by_state[state] for state in included_states]
+ expected = {
+ 'object_count': sum([sr.object_count for sr in included]),
+ 'bytes_used': sum([sr.bytes_used for sr in included])
+ }
+ self.assertEqual(expected, broker.get_shard_usage())
+
+ @with_tempdir
+ def _check_find_shard_ranges(self, c_lower, c_upper, tempdir):
+ ts_iter = make_timestamp_iter()
+ ts_now = Timestamp.now()
+ container_name = 'test_container'
+
+ def do_test(expected_bounds, expected_last_found, shard_size, limit,
+ start_index=0, existing=None):
+ # expected_bounds is a list of tuples (lower, upper, object_count)
+ # build expected shard ranges
+ expected_shard_ranges = [
+ dict(lower=lower, upper=upper, index=index,
+ object_count=object_count)
+ for index, (lower, upper, object_count)
+ in enumerate(expected_bounds, start_index)]
+
+ with mock.patch('swift.common.utils.time.time',
+ return_value=float(ts_now.normal)):
+ ranges, last_found = broker.find_shard_ranges(
+ shard_size, limit=limit, existing_ranges=existing)
+ self.assertEqual(expected_shard_ranges, ranges)
+ self.assertEqual(expected_last_found, last_found)
+
+ db_path = os.path.join(tempdir, 'test_container.db')
+ broker = ContainerBroker(
+ db_path, account='a', container=container_name)
+ # shard size > object count, no objects
+ broker.initialize(next(ts_iter).internal, 0)
+
+ ts = next(ts_iter)
+ if c_lower or c_upper:
+ # testing a shard, so set its own shard range
+ own_shard_range = ShardRange(broker.path, ts, c_lower, c_upper)
+ broker.merge_shard_ranges([own_shard_range])
+
+ self.assertEqual(([], False), broker.find_shard_ranges(10))
+
+ for i in range(10):
+ broker.put_object(
+ 'obj%02d' % i, next(ts_iter).internal, 0, 'text/plain', 'etag')
+
+ expected_bounds = [(c_lower, 'obj04', 5), ('obj04', c_upper, 5)]
+ do_test(expected_bounds, True, shard_size=5, limit=None)
+
+ expected = [(c_lower, 'obj06', 7), ('obj06', c_upper, 3)]
+ do_test(expected, True, shard_size=7, limit=None)
+ expected = [(c_lower, 'obj08', 9), ('obj08', c_upper, 1)]
+ do_test(expected, True, shard_size=9, limit=None)
+ # shard size >= object count
+ do_test([], False, shard_size=10, limit=None)
+ do_test([], False, shard_size=11, limit=None)
+
+ # check use of limit
+ do_test([], False, shard_size=4, limit=0)
+ expected = [(c_lower, 'obj03', 4)]
+ do_test(expected, False, shard_size=4, limit=1)
+ expected = [(c_lower, 'obj03', 4), ('obj03', 'obj07', 4)]
+ do_test(expected, False, shard_size=4, limit=2)
+ expected = [(c_lower, 'obj03', 4), ('obj03', 'obj07', 4),
+ ('obj07', c_upper, 2)]
+ do_test(expected, True, shard_size=4, limit=3)
+ do_test(expected, True, shard_size=4, limit=4)
+ do_test(expected, True, shard_size=4, limit=-1)
+
+ # increase object count to 11
+ broker.put_object(
+ 'obj10', next(ts_iter).internal, 0, 'text/plain', 'etag')
+ expected = [(c_lower, 'obj03', 4), ('obj03', 'obj07', 4),
+ ('obj07', c_upper, 3)]
+ do_test(expected, True, shard_size=4, limit=None)
+
+ expected = [(c_lower, 'obj09', 10), ('obj09', c_upper, 1)]
+ do_test(expected, True, shard_size=10, limit=None)
+ do_test([], False, shard_size=11, limit=None)
+
+ # now pass in a pre-existing shard range
+ existing = [ShardRange(
+ '.shards_a/srange-0', Timestamp.now(), '', 'obj03',
+ object_count=4, state=ShardRange.FOUND)]
+
+ expected = [('obj03', 'obj07', 4), ('obj07', c_upper, 3)]
+ do_test(expected, True, shard_size=4, limit=None, start_index=1,
+ existing=existing)
+ expected = [('obj03', 'obj07', 4)]
+ do_test(expected, False, shard_size=4, limit=1, start_index=1,
+ existing=existing)
+ # using increased shard size should not distort estimation of progress
+ expected = [('obj03', 'obj09', 6), ('obj09', c_upper, 1)]
+ do_test(expected, True, shard_size=6, limit=None, start_index=1,
+ existing=existing)
+
+ # add another existing...
+ existing.append(ShardRange(
+ '.shards_a/srange-1', Timestamp.now(), '', 'obj07',
+ object_count=4, state=ShardRange.FOUND))
+ expected = [('obj07', c_upper, 3)]
+ do_test(expected, True, shard_size=10, limit=None, start_index=2,
+ existing=existing)
+ # an existing shard range not in FOUND state should not distort
+ # estimation of progress, but may cause final range object count to
+ # default to shard_size
+ existing[-1].state = ShardRange.CREATED
+ existing[-1].object_count = 10
+ # there's only 3 objects left to scan but progress cannot be reliably
+ # calculated, so final shard range has object count of 2
+ expected = [('obj07', 'obj09', 2), ('obj09', c_upper, 2)]
+ do_test(expected, True, shard_size=2, limit=None, start_index=2,
+ existing=existing)
+
+ # add last shard range so there's none left to find
+ existing.append(ShardRange(
+ '.shards_a/srange-2', Timestamp.now(), 'obj07', c_upper,
+ object_count=4, state=ShardRange.FOUND))
+ do_test([], True, shard_size=4, limit=None, existing=existing)
+
+ def test_find_shard_ranges(self):
+ self._check_find_shard_ranges('', '')
+ self._check_find_shard_ranges('', 'upper')
+ self._check_find_shard_ranges('lower', '')
+ self._check_find_shard_ranges('lower', 'upper')
+
+ @with_tempdir
+ def test_find_shard_ranges_with_misplaced_objects(self, tempdir):
+ # verify that misplaced objects outside of a shard's range do not
+ # influence choice of shard ranges (but do distort the object counts)
+ ts_iter = make_timestamp_iter()
+ ts_now = Timestamp.now()
+ container_name = 'test_container'
+
+ db_path = os.path.join(tempdir, 'test_container.db')
+ broker = ContainerBroker(
+ db_path, account='a', container=container_name)
+ # shard size > object count, no objects
+ broker.initialize(next(ts_iter).internal, 0)
+
+ ts = next(ts_iter)
+ own_shard_range = ShardRange(broker.path, ts, 'l', 'u')
+ broker.merge_shard_ranges([own_shard_range])
+
+ self.assertEqual(([], False), broker.find_shard_ranges(10))
+
+ for name in ('a-misplaced', 'm', 'n', 'p', 'q', 'r', 'z-misplaced'):
+ broker.put_object(
+ name, next(ts_iter).internal, 0, 'text/plain', 'etag')
+
+ expected_bounds = (
+ ('l', 'n', 2), # contains m, n
+ ('n', 'q', 2), # contains p, q
+ ('q', 'u', 3) # contains r; object count distorted by 2 misplaced
+ )
+ expected_shard_ranges = [
+ dict(lower=lower, upper=upper, index=index,
+ object_count=object_count)
+ for index, (lower, upper, object_count)
+ in enumerate(expected_bounds)]
+
+ with mock.patch('swift.common.utils.time.time',
+ return_value=float(ts_now.normal)):
+ actual_shard_ranges, last_found = broker.find_shard_ranges(2, -1)
+ self.assertEqual(expected_shard_ranges, actual_shard_ranges)
+
+ ts_iter = make_timestamp_iter()
+ ts_now = Timestamp.now()
+ container_name = 'test_container'
+
+ @with_tempdir
+ def test_find_shard_ranges_errors(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(tempdir, 'test_container.db')
+ broker = ContainerBroker(db_path, account='a', container='c',
+ logger=FakeLogger())
+ broker.initialize(next(ts_iter).internal, 0)
+ for i in range(2):
+ broker.put_object(
+ 'obj%d' % i, next(ts_iter).internal, 0, 'text/plain', 'etag')
+
+ klass = 'swift.container.backend.ContainerBroker'
+ with mock.patch(klass + '._get_next_shard_range_upper',
+ side_effect=LockTimeout()):
+ ranges, last_found = broker.find_shard_ranges(1)
+ self.assertFalse(ranges)
+ self.assertFalse(last_found)
+ lines = broker.logger.get_lines_for_level('error')
+ self.assertIn('Problem finding shard upper', lines[0])
+ self.assertFalse(lines[1:])
+
+ broker.logger.clear()
+ with mock.patch(klass + '._get_next_shard_range_upper',
+ side_effect=sqlite3.OperationalError()):
+ ranges, last_found = broker.find_shard_ranges(1)
+ self.assertFalse(last_found)
+ self.assertFalse(ranges)
+ lines = broker.logger.get_lines_for_level('error')
+ self.assertIn('Problem finding shard upper', lines[0])
+ self.assertFalse(lines[1:])
+
+ @with_tempdir
+ def test_set_db_states(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(next(ts_iter).internal, 0)
+
+ # load up the broker with some objects
+ objects = [{'name': 'obj_%d' % i,
+ 'created_at': next(ts_iter).normal,
+ 'content_type': 'text/plain',
+ 'etag': 'etag_%d' % i,
+ 'size': 1024 * i,
+ 'deleted': 0,
+ 'storage_policy_index': 0,
+ } for i in range(1, 6)]
+ # merge_items mutates items
+ broker.merge_items([dict(obj) for obj in objects])
+ original_info = broker.get_info()
+
+ # Add some metadata
+ meta = {
+ 'X-Container-Meta-Color': ['Blue', next(ts_iter).normal],
+ 'X-Container-Meta-Cleared': ['', next(ts_iter).normal],
+ 'X-Container-Sysmeta-Shape': ['Circle', next(ts_iter).normal],
+ }
+ broker.update_metadata(meta)
+
+ # Add some syncs
+ incoming_sync = {'remote_id': 'incoming_123', 'sync_point': 1}
+ outgoing_sync = {'remote_id': 'outgoing_123', 'sync_point': 2}
+ broker.merge_syncs([outgoing_sync], incoming=False)
+ broker.merge_syncs([incoming_sync], incoming=True)
+
+ # Add some ShardRanges
+ shard_ranges = [ShardRange(
+ name='.shards_a/shard_range_%s' % i,
+ timestamp=next(ts_iter), lower='obj_%d' % i,
+ upper='obj_%d' % (i + 2),
+ object_count=len(objects[i:i + 2]),
+ bytes_used=sum(obj['size'] for obj in objects[i:i + 2]),
+ meta_timestamp=next(ts_iter)) for i in range(0, 6, 2)]
+ deleted_range = ShardRange('.shards_a/shard_range_z', next(ts_iter),
+ 'z', '', state=ShardRange.SHARDED,
+ deleted=1)
+ own_sr = ShardRange(name='a/c', timestamp=next(ts_iter),
+ state=ShardRange.ACTIVE)
+ broker.merge_shard_ranges([own_sr] + shard_ranges + [deleted_range])
+ ts_epoch = next(ts_iter)
+ new_db_path = os.path.join(tempdir, 'part', 'suffix', 'hash',
+ 'container_%s.db' % ts_epoch.normal)
+
+ def check_broker_properties(broker):
+ # these broker properties should remain unchanged as state changes
+ self.assertEqual(broker.get_max_row(), 5)
+ all_metadata = broker.metadata
+ original_meta = dict((k, all_metadata[k]) for k in meta)
+ self.assertEqual(original_meta, meta)
+ self.assertEqual(broker.get_syncs(True)[0], incoming_sync)
+ self.assertEqual(broker.get_syncs(False)[0], outgoing_sync)
+ self.assertEqual(shard_ranges + [own_sr, deleted_range],
+ broker.get_shard_ranges(include_own=True,
+ include_deleted=True))
+
+ def check_broker_info(actual_info):
+ for key in ('db_state', 'id', 'hash'):
+ actual_info.pop(key, None)
+ original_info.pop(key, None)
+ self.assertEqual(original_info, actual_info)
+
+ def check_unsharded_state(broker):
+ # these are expected properties in unsharded state
+ self.assertEqual(len(broker.get_brokers()), 1)
+ self.assertEqual(broker.get_db_state(), UNSHARDED)
+ self.assertTrue(os.path.exists(db_path))
+ self.assertFalse(os.path.exists(new_db_path))
+ self.assertEqual(objects, broker.get_objects())
+
+ # Sanity checks
+ check_broker_properties(broker)
+ check_unsharded_state(broker)
+ check_broker_info(broker.get_info())
+
+ # first test that moving from UNSHARDED to SHARDED doesn't work
+ self.assertFalse(broker.set_sharded_state())
+ # check nothing changed
+ check_broker_properties(broker)
+ check_broker_info(broker.get_info())
+ check_unsharded_state(broker)
+
+ # cannot go to SHARDING without an epoch set
+ self.assertFalse(broker.set_sharding_state())
+
+ # now set sharding epoch and make sure everything moves.
+ broker.enable_sharding(ts_epoch)
+ self.assertTrue(broker.set_sharding_state())
+ check_broker_properties(broker)
+ check_broker_info(broker.get_info())
+
+ def check_sharding_state(broker):
+ self.assertEqual(len(broker.get_brokers()), 2)
+ self.assertEqual(broker.get_db_state(), SHARDING)
+ self.assertTrue(os.path.exists(db_path))
+ self.assertTrue(os.path.exists(new_db_path))
+ self.assertEqual([], broker.get_objects())
+ self.assertEqual(objects, broker.get_brokers()[0].get_objects())
+ check_sharding_state(broker)
+
+ # to confirm we're definitely looking at the shard db
+ broker2 = ContainerBroker(new_db_path)
+ check_broker_properties(broker2)
+ check_broker_info(broker2.get_info())
+ self.assertEqual([], broker2.get_objects())
+
+ # Try to set sharding state again
+ self.assertFalse(broker.set_sharding_state())
+ # check nothing changed
+ check_broker_properties(broker)
+ check_broker_info(broker.get_info())
+ check_sharding_state(broker)
+
+ # Now move to the final state - update shard ranges' state
+ broker.merge_shard_ranges(
+ [dict(sr, state=ShardRange.ACTIVE,
+ state_timestamp=next(ts_iter).internal)
+ for sr in shard_ranges])
+ # pretend all ranges have been cleaved
+ self.assertTrue(broker.set_sharded_state())
+ check_broker_properties(broker)
+ check_broker_info(broker.get_info())
+
+ def check_sharded_state(broker):
+ self.assertEqual(broker.get_db_state(), SHARDED)
+ self.assertEqual(len(broker.get_brokers()), 1)
+ self.assertFalse(os.path.exists(db_path))
+ self.assertTrue(os.path.exists(new_db_path))
+ self.assertEqual([], broker.get_objects())
+ check_sharded_state(broker)
+
+ # Try to set sharded state again
+ self.assertFalse(broker.set_sharded_state())
+ # check nothing changed
+ check_broker_properties(broker)
+ check_broker_info(broker.get_info())
+ check_sharded_state(broker)
+
+ # delete the container - sharding sysmeta gets erased
+ broker.delete_db(next(ts_iter).internal)
+ # but it is not considered deleted while shards have content
+ self.assertFalse(broker.is_deleted())
+ check_sharded_state(broker)
+ # empty the shard ranges
+ empty_shard_ranges = [sr.copy(object_count=0, bytes_used=0,
+ meta_timestamp=next(ts_iter))
+ for sr in shard_ranges]
+ broker.merge_shard_ranges(empty_shard_ranges)
+ # and no it is deleted
+ self.assertTrue(broker.is_deleted())
+ check_sharded_state(broker)
+
+ def do_revive_shard_delete(shard_ranges):
+ # delete all shard ranges
+ deleted_shard_ranges = [sr.copy(timestamp=next(ts_iter), deleted=1)
+ for sr in shard_ranges]
+ broker.merge_shard_ranges(deleted_shard_ranges)
+ self.assertEqual(COLLAPSED, broker.get_db_state())
+
+ # add new shard ranges and go to sharding state - need to force
+ # broker time to be after the delete time in order to write new
+ # sysmeta
+ broker.enable_sharding(next(ts_iter))
+ shard_ranges = [sr.copy(timestamp=next(ts_iter))
+ for sr in shard_ranges]
+ broker.merge_shard_ranges(shard_ranges)
+ with mock.patch('swift.common.db.time.time',
+ lambda: float(next(ts_iter))):
+ self.assertTrue(broker.set_sharding_state())
+ self.assertEqual(SHARDING, broker.get_db_state())
+
+ # go to sharded
+ self.assertTrue(
+ broker.set_sharded_state())
+ self.assertEqual(SHARDED, broker.get_db_state())
+
+ # delete again
+ broker.delete_db(next(ts_iter).internal)
+ self.assertTrue(broker.is_deleted())
+ self.assertEqual(SHARDED, broker.get_db_state())
+
+ do_revive_shard_delete(shard_ranges)
+ do_revive_shard_delete(shard_ranges)
+
+ @with_tempdir
+ def test_set_sharding_state_errors(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(db_path, account='a', container='c',
+ logger=FakeLogger())
+ broker.initialize(next(ts_iter).internal, 0)
+ broker.enable_sharding(next(ts_iter))
+
+ orig_execute = GreenDBConnection.execute
+ trigger = 'INSERT into object'
+
+ def mock_execute(conn, *args, **kwargs):
+ if trigger in args[0]:
+ raise sqlite3.OperationalError()
+ return orig_execute(conn, *args, **kwargs)
+
+ with mock.patch('swift.common.db.GreenDBConnection.execute',
+ mock_execute):
+ res = broker.set_sharding_state()
+ self.assertFalse(res)
+ lines = broker.logger.get_lines_for_level('error')
+ self.assertIn('Failed to set the ROWID', lines[0])
+ self.assertFalse(lines[1:])
+
+ broker.logger.clear()
+ trigger = 'UPDATE container_stat SET created_at'
+ with mock.patch('swift.common.db.GreenDBConnection.execute',
+ mock_execute):
+ res = broker.set_sharding_state()
+ self.assertFalse(res)
+ lines = broker.logger.get_lines_for_level('error')
+ self.assertIn('Failed to set matching', lines[0])
+ self.assertFalse(lines[1:])
+
+ @with_tempdir
+ def test_set_sharded_state_errors(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ retiring_db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(retiring_db_path, account='a', container='c',
+ logger=FakeLogger())
+ broker.initialize(next(ts_iter).internal, 0)
+ pre_epoch = next(ts_iter)
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+ # unlink fails
+ with mock.patch('os.unlink', side_effect=OSError(errno.EPERM)):
+ self.assertFalse(broker.set_sharded_state())
+ lines = broker.logger.get_lines_for_level('error')
+ self.assertIn('Failed to unlink', lines[0])
+ self.assertFalse(lines[1:])
+ self.assertFalse(broker.logger.get_lines_for_level('warning'))
+ self.assertTrue(os.path.exists(retiring_db_path))
+ self.assertTrue(os.path.exists(broker.db_file))
+
+ # extra files
+ extra_filename = make_db_file_path(broker.db_file, pre_epoch)
+ self.assertNotEqual(extra_filename, broker.db_file) # sanity check
+ with open(extra_filename, 'wb'):
+ pass
+ broker.logger.clear()
+ self.assertFalse(broker.set_sharded_state())
+ lines = broker.logger.get_lines_for_level('warning')
+ self.assertIn('Still have multiple db files', lines[0])
+ self.assertFalse(lines[1:])
+ self.assertFalse(broker.logger.get_lines_for_level('error'))
+ self.assertTrue(os.path.exists(retiring_db_path))
+ self.assertTrue(os.path.exists(broker.db_file))
+
+ # retiring file missing
+ broker.logger.clear()
+ os.unlink(retiring_db_path)
+ self.assertFalse(broker.set_sharded_state())
+ lines = broker.logger.get_lines_for_level('warning')
+ self.assertIn('Refusing to delete', lines[0])
+ self.assertFalse(lines[1:])
+ self.assertFalse(broker.logger.get_lines_for_level('error'))
+ self.assertTrue(os.path.exists(broker.db_file))
+
+ @with_tempdir
+ def test_get_brokers(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ retiring_db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(retiring_db_path, account='a', container='c',
+ logger=FakeLogger())
+ broker.initialize(next(ts_iter).internal, 0)
+ brokers = broker.get_brokers()
+ self.assertEqual(retiring_db_path, brokers[0].db_file)
+ self.assertFalse(brokers[0].skip_commits)
+ self.assertFalse(brokers[1:])
+
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+ brokers = broker.get_brokers()
+ self.assertEqual(retiring_db_path, brokers[0].db_file)
+ self.assertTrue(brokers[0].skip_commits)
+ self.assertEqual(broker.db_file, brokers[1].db_file)
+ self.assertFalse(brokers[1].skip_commits)
+ self.assertFalse(brokers[2:])
+
+ # same outcome when called on retiring db broker
+ brokers = brokers[0].get_brokers()
+ self.assertEqual(retiring_db_path, brokers[0].db_file)
+ self.assertTrue(brokers[0].skip_commits)
+ self.assertEqual(broker.db_file, brokers[1].db_file)
+ self.assertFalse(brokers[1].skip_commits)
+ self.assertFalse(brokers[2:])
+
+ self.assertTrue(broker.set_sharded_state())
+ brokers = broker.get_brokers()
+ self.assertEqual(broker.db_file, brokers[0].db_file)
+ self.assertFalse(brokers[0].skip_commits)
+ self.assertFalse(brokers[1:])
+
+ # unexpected extra file should be ignored
+ with open(retiring_db_path, 'wb'):
+ pass
+ retiring_db_path = broker.db_file
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+ broker.reload_db_files()
+ self.assertEqual(3, len(broker.db_files)) # sanity check
+ brokers = broker.get_brokers()
+ self.assertEqual(retiring_db_path, brokers[0].db_file)
+ self.assertTrue(brokers[0].skip_commits)
+ self.assertEqual(broker.db_file, brokers[1].db_file)
+ self.assertFalse(brokers[1].skip_commits)
+ self.assertFalse(brokers[2:])
+ lines = broker.logger.get_lines_for_level('warning')
+ self.assertIn('Unexpected db files', lines[0])
+ self.assertFalse(lines[1:])
+
+ @with_tempdir
+ def test_merge_shard_ranges(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ ts = [next(ts_iter) for _ in range(13)]
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(
+ db_path, account='a', container='c')
+ broker.initialize(next(ts_iter).internal, 0)
+
+ # sanity check
+ self.assertFalse(broker.get_shard_ranges(include_deleted=True))
+
+ broker.merge_shard_ranges(None)
+ self.assertFalse(broker.get_shard_ranges(include_deleted=True))
+
+ # merge item at ts1
+ # sr___
+ sr_b_1_1 = ShardRange('a/c_b', ts[1], lower='a', upper='b',
+ object_count=2)
+ broker.merge_shard_ranges([sr_b_1_1])
+ self._assert_shard_ranges(broker, [sr_b_1_1])
+
+ # merge older item - ignored
+ sr_b_0_0 = ShardRange('a/c_b', ts[0], lower='a', upper='b',
+ object_count=1)
+ broker.merge_shard_ranges([sr_b_0_0])
+ self._assert_shard_ranges(broker, [sr_b_1_1])
+
+ # merge same timestamp - ignored
+ broker.merge_shard_ranges([dict(sr_b_1_1, lower='', upper='c')])
+ self._assert_shard_ranges(broker, [sr_b_1_1])
+ broker.merge_shard_ranges([dict(sr_b_1_1, object_count=99)])
+ self._assert_shard_ranges(broker, [sr_b_1_1])
+
+ # merge list with older item *after* newer item
+ sr_c_2_2 = ShardRange('a/c_c', ts[2], lower='b', upper='c',
+ object_count=3)
+ sr_c_3_3 = ShardRange('a/c_c', ts[3], lower='b', upper='c',
+ object_count=4)
+ broker.merge_shard_ranges([sr_c_3_3, sr_c_2_2])
+ self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_3_3])
+
+ # merge newer item - updated
+ sr_c_5_5 = ShardRange('a/c_c', ts[5], lower='b', upper='c',
+ object_count=5)
+ broker.merge_shard_ranges([sr_c_5_5])
+ self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_5])
+
+ # merge older metadata item - ignored
+ sr_c_5_4 = ShardRange('a/c_c', ts[5], lower='b', upper='c',
+ object_count=6, meta_timestamp=ts[4])
+ broker.merge_shard_ranges([sr_c_5_4])
+ self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_5])
+
+ # merge newer metadata item - only metadata is updated
+ sr_c_5_6 = ShardRange('a/c_c', ts[5], lower='b', upper='c',
+ object_count=7, meta_timestamp=ts[6])
+ broker.merge_shard_ranges([dict(sr_c_5_6, lower='', upper='d')])
+ self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_6])
+
+ # merge older created_at, newer metadata item - ignored
+ sr_c_4_7 = ShardRange('a/c_c', ts[4], lower='b', upper='c',
+ object_count=8, meta_timestamp=ts[7])
+ broker.merge_shard_ranges([sr_c_4_7])
+ self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_6])
+
+ # merge list with older metadata item *after* newer metadata item
+ sr_c_5_11 = ShardRange('a/c_c', ts[5], lower='b', upper='c',
+ object_count=9, meta_timestamp=ts[11])
+ broker.merge_shard_ranges([sr_c_5_11, sr_c_5_6])
+ self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11])
+
+ # deleted item at *same timestamp* as existing - deleted ignored
+ broker.merge_shard_ranges([dict(sr_b_1_1, deleted=1, object_count=0)])
+ self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11])
+ sr_b_1_1.meta_timestamp = ts[11]
+ broker.merge_shard_ranges([dict(sr_b_1_1, deleted=1)])
+ self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11])
+ sr_b_1_1.state_timestamp = ts[11]
+ broker.merge_shard_ranges([dict(sr_b_1_1, deleted=1)])
+ self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11])
+
+ # delete item at *newer timestamp* - updated
+ sr_b_2_2_deleted = ShardRange('a/c_b', ts[2], lower='a', upper='b',
+ object_count=0, deleted=1)
+ broker.merge_shard_ranges([sr_b_2_2_deleted])
+ self._assert_shard_ranges(broker, [sr_b_2_2_deleted, sr_c_5_11])
+
+ # merge list with older undeleted item *after* newer deleted item
+ # NB deleted timestamp trumps newer meta timestamp
+ sr_c_9_12 = ShardRange('a/c_c', ts[9], lower='b', upper='c',
+ object_count=10, meta_timestamp=ts[12])
+ sr_c_10_10_deleted = ShardRange('a/c_c', ts[10], lower='b', upper='c',
+ object_count=0, deleted=1)
+ broker.merge_shard_ranges([sr_c_10_10_deleted, sr_c_9_12])
+ self._assert_shard_ranges(
+ broker, [sr_b_2_2_deleted, sr_c_10_10_deleted])
+
+ @with_tempdir
+ def test_merge_shard_ranges_state(self, tempdir):
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(db_path, account='a', container='c')
+ broker.initialize(next(ts_iter).internal, 0)
+ expected_shard_ranges = []
+
+ def do_test(orig_state, orig_timestamp, test_state, test_timestamp,
+ expected_state, expected_timestamp):
+ index = len(expected_shard_ranges)
+ sr = ShardRange('a/%s' % index, orig_timestamp, '%03d' % index,
+ '%03d' % (index + 1), state=orig_state)
+ broker.merge_shard_ranges([sr])
+ sr.state = test_state
+ sr.state_timestamp = test_timestamp
+ broker.merge_shard_ranges([sr])
+ sr.state = expected_state
+ sr.state_timestamp = expected_timestamp
+ expected_shard_ranges.append(sr)
+ self._assert_shard_ranges(broker, expected_shard_ranges)
+
+ # state at older state_timestamp is not merged
+ for orig_state in ShardRange.STATES:
+ for test_state in ShardRange.STATES:
+ ts_older = next(ts_iter)
+ ts = next(ts_iter)
+ do_test(orig_state, ts, test_state, ts_older, orig_state, ts)
+
+ # more advanced state at same timestamp is merged
+ for orig_state in ShardRange.STATES:
+ for test_state in ShardRange.STATES:
+ ts = next(ts_iter)
+ do_test(orig_state, ts, test_state, ts,
+ test_state if test_state > orig_state else orig_state,
+ ts)
+
+ # any state at newer timestamp is merged
+ for orig_state in ShardRange.STATES:
+ for test_state in ShardRange.STATES:
+ ts = next(ts_iter)
+ ts_newer = next(ts_iter)
+ do_test(orig_state, ts, test_state, ts_newer, test_state,
+ ts_newer)
+
+ def _check_object_stats_when_sharded(self, a, c, root_a, root_c, tempdir):
+ # common setup and assertions for root and shard containers
+ ts_iter = make_timestamp_iter()
+ db_path = os.path.join(
+ tempdir, 'part', 'suffix', 'hash', 'container.db')
+ broker = ContainerBroker(
+ db_path, account=a, container=c)
+ broker.initialize(next(ts_iter).internal, 0)
+ broker.set_sharding_sysmeta('Root', '%s/%s' % (root_a, root_c))
+ broker.merge_items([{'name': 'obj', 'size': 14, 'etag': 'blah',
+ 'content_type': 'text/plain', 'deleted': 0,
+ 'created_at': Timestamp.now().internal}])
+ self.assertEqual(1, broker.get_info()['object_count'])
+ self.assertEqual(14, broker.get_info()['bytes_used'])
+
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+ sr_1 = ShardRange(
+ '%s/%s1' % (root_a, root_c), Timestamp.now(), lower='', upper='m',
+ object_count=99, bytes_used=999, state=ShardRange.ACTIVE)
+ sr_2 = ShardRange(
+ '%s/%s2' % (root_a, root_c), Timestamp.now(), lower='m', upper='',
+ object_count=21, bytes_used=1000, state=ShardRange.ACTIVE)
+ broker.merge_shard_ranges([sr_1, sr_2])
+ self.assertEqual(1, broker.get_info()['object_count'])
+ self.assertEqual(14, broker.get_info()['bytes_used'])
+ return broker
+
+ @with_tempdir
+ def test_object_stats_root_container(self, tempdir):
+ broker = self._check_object_stats_when_sharded(
+ 'a', 'c', 'a', 'c', tempdir)
+ self.assertTrue(broker.is_root_container()) # sanity
+ self.assertTrue(broker.set_sharded_state())
+ self.assertEqual(120, broker.get_info()['object_count'])
+ self.assertEqual(1999, broker.get_info()['bytes_used'])
+
+ @with_tempdir
+ def test_object_stats_shard_container(self, tempdir):
+ broker = self._check_object_stats_when_sharded(
+ '.shard_a', 'c-blah', 'a', 'c', tempdir)
+ self.assertFalse(broker.is_root_container()) # sanity
+ self.assertTrue(broker.set_sharded_state())
+ self.assertEqual(0, broker.get_info()['object_count'])
+ self.assertEqual(0, broker.get_info()['bytes_used'])
+
class TestCommonContainerBroker(test_db.TestExampleBroker):
@@ -2132,6 +4477,8 @@ class ContainerBrokerMigrationMixin(object):
ContainerBroker.create_object_table
ContainerBroker.create_object_table = \
prespi_create_object_table
+ self._imported_create_shard_ranges_table = \
+ ContainerBroker.create_shard_range_table
self._imported_create_container_info_table = \
ContainerBroker.create_container_info_table
ContainerBroker.create_container_info_table = \
@@ -2156,6 +4503,8 @@ class ContainerBrokerMigrationMixin(object):
self._imported_create_container_info_table
ContainerBroker.create_object_table = \
self._imported_create_object_table
+ ContainerBroker.create_shard_range_table = \
+ self._imported_create_shard_ranges_table
ContainerBroker.create_policy_stat_table = \
self._imported_create_policy_stat_table
@@ -2209,6 +4558,8 @@ class TestContainerBrokerBeforeMetadata(ContainerBrokerMigrationMixin,
Tests for ContainerBroker against databases created before
the metadata column was added.
"""
+ expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object',
+ 'sqlite_sequence', 'container_stat', 'shard_range'}
def setUp(self):
super(TestContainerBrokerBeforeMetadata, self).setUp()
@@ -2281,6 +4632,8 @@ class TestContainerBrokerBeforeXSync(ContainerBrokerMigrationMixin,
Tests for ContainerBroker against databases created
before the x_container_sync_point[12] columns were added.
"""
+ expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object',
+ 'sqlite_sequence', 'container_stat', 'shard_range'}
def setUp(self):
super(TestContainerBrokerBeforeXSync, self).setUp()
@@ -2395,6 +4748,8 @@ class TestContainerBrokerBeforeSPI(ContainerBrokerMigrationMixin,
Tests for ContainerBroker against databases created
before the storage_policy_index column was added.
"""
+ expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object',
+ 'sqlite_sequence', 'container_stat', 'shard_range'}
def setUp(self):
super(TestContainerBrokerBeforeSPI, self).setUp()
@@ -2599,6 +4954,48 @@ class TestContainerBrokerBeforeSPI(ContainerBrokerMigrationMixin,
self.assertEqual(info['bytes_used'], 456)
+class TestContainerBrokerBeforeShardRanges(ContainerBrokerMigrationMixin,
+ TestContainerBroker):
+ """
+ Tests for ContainerBroker against databases created
+ before the shard_ranges table was added.
+ """
+ expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object',
+ 'sqlite_sequence', 'container_stat'}
+
+ class Override(object):
+ def __init__(self, func):
+ self.func = func
+
+ def __get__(self, obj, obj_type):
+ if inspect.stack()[1][3] == '_initialize':
+ return lambda *a, **kw: None
+ return self.func.__get__(obj, obj_type)
+
+ def setUp(self):
+ super(TestContainerBrokerBeforeShardRanges, self).setUp()
+ ContainerBroker.create_shard_range_table = self.Override(
+ ContainerBroker.create_shard_range_table)
+ broker = ContainerBroker(':memory:', account='a', container='c')
+ broker.initialize(Timestamp('1').internal, 0)
+ exc = None
+ with broker.get() as conn:
+ try:
+ conn.execute('''SELECT *
+ FROM shard_range''')
+ except BaseException as err:
+ exc = err
+ self.assertTrue('no such table: shard_range' in str(exc))
+
+ def tearDown(self):
+ super(TestContainerBrokerBeforeShardRanges, self).tearDown()
+ broker = ContainerBroker(':memory:', account='a', container='c')
+ broker.initialize(Timestamp('1').internal, 0)
+ with broker.get() as conn:
+ conn.execute('''SELECT *
+ FROM shard_range''')
+
+
class TestUpdateNewItemFromExisting(unittest.TestCase):
# TODO: add test scenarios that have swift_bytes in content_type
t0 = '1234567890.00000'
diff --git a/test/unit/container/test_replicator.py b/test/unit/container/test_replicator.py
index ff63a2992c..23f06ddc97 100644
--- a/test/unit/container/test_replicator.py
+++ b/test/unit/container/test_replicator.py
@@ -26,13 +26,17 @@ from swift.common import db_replicator
from swift.container import replicator, backend, server, sync_store
from swift.container.reconciler import (
MISPLACED_OBJECTS_ACCOUNT, get_reconciler_container_name)
-from swift.common.utils import Timestamp, encode_timestamps
+from swift.common.utils import Timestamp, encode_timestamps, ShardRange, \
+ get_db_files, make_db_file_path
from swift.common.storage_policy import POLICIES
from test.unit.common import test_db_replicator
-from test.unit import patch_policies, make_timestamp_iter, mock_check_drive
+from test.unit import patch_policies, make_timestamp_iter, mock_check_drive, \
+ debug_logger
from contextlib import contextmanager
+from test.unit.common.test_db_replicator import attach_fake_replication_rpc
+
@patch_policies
class TestReplicatorSync(test_db_replicator.TestReplicatorSync):
@@ -42,6 +46,16 @@ class TestReplicatorSync(test_db_replicator.TestReplicatorSync):
replicator_daemon = replicator.ContainerReplicator
replicator_rpc = replicator.ContainerReplicatorRpc
+ def assertShardRangesEqual(self, x, y):
+ # ShardRange.__eq__ only compares lower and upper; here we generate
+ # dict representations to compare all attributes
+ self.assertEqual([dict(sr) for sr in x], [dict(sr) for sr in y])
+
+ def assertShardRangesNotEqual(self, x, y):
+ # ShardRange.__eq__ only compares lower and upper; here we generate
+ # dict representations to compare all attributes
+ self.assertNotEqual([dict(sr) for sr in x], [dict(sr) for sr in y])
+
def test_report_up_to_date(self):
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(Timestamp(1).internal, int(POLICIES.default))
@@ -1148,6 +1162,1037 @@ class TestReplicatorSync(test_db_replicator.TestReplicatorSync):
self.assertEqual(1, mock_remove.call_count)
self.assertEqual(broker_2.db_file, mock_remove.call_args[0][0].db_file)
+ def test_cleanup_post_replicate(self):
+ broker = self._get_broker('a', 'c', node_index=0)
+ put_timestamp = Timestamp.now()
+ broker.initialize(put_timestamp.internal, POLICIES.default.idx)
+ orig_info = broker.get_replication_info()
+ daemon = replicator.ContainerReplicator({}, logger=self.logger)
+
+ # db should not be here, replication ok, deleted
+ res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
+ self.assertTrue(res)
+ self.assertFalse(os.path.exists(broker.db_file))
+ self.assertEqual(['Successfully deleted db %s' % broker.db_file],
+ daemon.logger.get_lines_for_level('debug'))
+ daemon.logger.clear()
+
+ # failed replication, not deleted
+ broker.initialize(put_timestamp.internal, POLICIES.default.idx)
+ orig_info = broker.get_replication_info()
+ res = daemon.cleanup_post_replicate(broker, orig_info,
+ [False, True, True])
+ self.assertTrue(res)
+ self.assertTrue(os.path.exists(broker.db_file))
+ self.assertEqual(['Not deleting db %s (2/3 success)' % broker.db_file],
+ daemon.logger.get_lines_for_level('debug'))
+ daemon.logger.clear()
+
+ # db has shard ranges, not deleted
+ broker.enable_sharding(Timestamp.now())
+ broker.merge_shard_ranges(
+ [ShardRange('.shards_a/c', Timestamp.now(), '', 'm')])
+ self.assertTrue(broker.sharding_required()) # sanity check
+ res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
+ self.assertTrue(res)
+ self.assertTrue(os.path.exists(broker.db_file))
+ self.assertEqual(
+ ['Not deleting db %s (requires sharding, state unsharded)' %
+ broker.db_file],
+ daemon.logger.get_lines_for_level('debug'))
+ daemon.logger.clear()
+
+ # db sharding, not deleted
+ self._goto_sharding_state(broker, Timestamp.now())
+ self.assertTrue(broker.sharding_required()) # sanity check
+ orig_info = broker.get_replication_info()
+ res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
+ self.assertTrue(res)
+ self.assertTrue(os.path.exists(broker.db_file))
+ self.assertEqual(
+ ['Not deleting db %s (requires sharding, state sharding)' %
+ broker.db_file],
+ daemon.logger.get_lines_for_level('debug'))
+ daemon.logger.clear()
+
+ # db sharded, should not be here, failed replication, not deleted
+ self._goto_sharded_state(broker)
+ self.assertFalse(broker.sharding_required()) # sanity check
+ res = daemon.cleanup_post_replicate(broker, orig_info,
+ [True, False, True])
+ self.assertTrue(res)
+ self.assertTrue(os.path.exists(broker.db_file))
+ self.assertEqual(['Not deleting db %s (2/3 success)' %
+ broker.db_file],
+ daemon.logger.get_lines_for_level('debug'))
+ daemon.logger.clear()
+
+ # db sharded, should not be here, new shard ranges (e.g. from reverse
+ # replication), deleted
+ broker.merge_shard_ranges(
+ [ShardRange('.shards_a/c', Timestamp.now(), '', 'm')])
+ res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
+ self.assertTrue(res)
+ self.assertFalse(os.path.exists(broker.db_file))
+ daemon.logger.clear()
+
+ # db sharded, should not be here, replication ok, deleted
+ broker.initialize(put_timestamp.internal, POLICIES.default.idx)
+ self.assertTrue(os.path.exists(broker.db_file))
+ orig_info = broker.get_replication_info()
+ res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
+ self.assertTrue(res)
+ self.assertFalse(os.path.exists(broker.db_file))
+ self.assertEqual(['Successfully deleted db %s' % broker.db_file],
+ daemon.logger.get_lines_for_level('debug'))
+ daemon.logger.clear()
+
+ def test_sync_shard_ranges(self):
+ put_timestamp = Timestamp.now().internal
+ # create "local" broker
+ broker = self._get_broker('a', 'c', node_index=0)
+ broker.initialize(put_timestamp, POLICIES.default.idx)
+ # create "remote" broker
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ remote_broker.initialize(put_timestamp, POLICIES.default.idx)
+
+ def check_replicate(expected_shard_ranges, from_broker, to_broker):
+ daemon = replicator.ContainerReplicator({})
+ part, node = self._get_broker_part_node(to_broker)
+ info = broker.get_replication_info()
+ success = daemon._repl_to_node(node, from_broker, part, info)
+ self.assertTrue(success)
+ self.assertEqual(
+ expected_shard_ranges,
+ to_broker.get_all_shard_range_data()
+ )
+ self.assertEqual(1, daemon.stats['deferred'])
+ self.assertEqual(0, daemon.stats['rsync'])
+ self.assertEqual(0, daemon.stats['diff'])
+ local_info = self._get_broker(
+ 'a', 'c', node_index=0).get_info()
+ remote_info = self._get_broker(
+ 'a', 'c', node_index=1).get_info()
+ for k, v in local_info.items():
+ if k == 'id':
+ continue
+ self.assertEqual(remote_info[k], v,
+ "mismatch remote %s %r != %r" % (
+ k, remote_info[k], v))
+
+ bounds = (('', 'g'), ('g', 'r'), ('r', ''))
+ shard_ranges = [
+ ShardRange('.shards_a/sr-%s' % upper, Timestamp.now(), lower,
+ upper, i + 1, 10 * (i + 1))
+ for i, (lower, upper) in enumerate(bounds)
+ ]
+ # add first two shard_ranges to both brokers
+ for shard_range in shard_ranges[:2]:
+ for db in (broker, remote_broker):
+ db.merge_shard_ranges(shard_range)
+ # now add a shard range to the "local" broker only
+ own_sr = broker.enable_sharding(Timestamp.now())
+ broker.merge_shard_ranges(shard_ranges[2])
+ broker_ranges = broker.get_all_shard_range_data()
+ self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges)
+ check_replicate(broker_ranges, broker, remote_broker)
+
+ # update one shard range
+ shard_ranges[1].update_meta(99, 0)
+ broker.merge_shard_ranges(shard_ranges[1])
+ # sanity check
+ broker_ranges = broker.get_all_shard_range_data()
+ self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges)
+ check_replicate(broker_ranges, broker, remote_broker)
+
+ # delete one shard range
+ shard_ranges[0].deleted = 1
+ shard_ranges[0].timestamp = Timestamp.now()
+ broker.merge_shard_ranges(shard_ranges[0])
+ # sanity check
+ broker_ranges = broker.get_all_shard_range_data()
+ self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges)
+ check_replicate(broker_ranges, broker, remote_broker)
+
+ # put a shard range again
+ shard_ranges[2].timestamp = Timestamp.now()
+ shard_ranges[2].object_count = 0
+ broker.merge_shard_ranges(shard_ranges[2])
+ # sanity check
+ broker_ranges = broker.get_all_shard_range_data()
+ self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges)
+ check_replicate(broker_ranges, broker, remote_broker)
+
+ # update same shard range on local and remote, remote later
+ shard_ranges[-1].meta_timestamp = Timestamp.now()
+ shard_ranges[-1].bytes_used += 1000
+ broker.merge_shard_ranges(shard_ranges[-1])
+ remote_shard_ranges = remote_broker.get_shard_ranges(
+ include_deleted=True)
+ remote_shard_ranges[-1].meta_timestamp = Timestamp.now()
+ remote_shard_ranges[-1].bytes_used += 2000
+ remote_broker.merge_shard_ranges(remote_shard_ranges[-1])
+ # sanity check
+ remote_broker_ranges = remote_broker.get_all_shard_range_data()
+ self.assertShardRangesEqual(remote_shard_ranges + [own_sr],
+ remote_broker_ranges)
+ self.assertShardRangesNotEqual(shard_ranges, remote_shard_ranges)
+ check_replicate(remote_broker_ranges, broker, remote_broker)
+
+ # undelete shard range *on the remote*
+ deleted_ranges = [sr for sr in remote_shard_ranges if sr.deleted]
+ self.assertEqual([shard_ranges[0]], deleted_ranges)
+ deleted_ranges[0].deleted = 0
+ deleted_ranges[0].timestamp = Timestamp.now()
+ remote_broker.merge_shard_ranges(deleted_ranges[0])
+ # sanity check
+ remote_broker_ranges = remote_broker.get_all_shard_range_data()
+ self.assertShardRangesEqual(remote_shard_ranges + [own_sr],
+ remote_broker_ranges)
+ self.assertShardRangesNotEqual(shard_ranges, remote_shard_ranges)
+ check_replicate(remote_broker_ranges, broker, remote_broker)
+
+ # reverse replication direction and expect syncs to propagate
+ check_replicate(remote_broker_ranges, remote_broker, broker)
+
+ def test_sync_shard_ranges_with_rsync(self):
+ broker = self._get_broker('a', 'c', node_index=0)
+ put_timestamp = time.time()
+ broker.initialize(put_timestamp, POLICIES.default.idx)
+
+ bounds = (('', 'g'), ('g', 'r'), ('r', ''))
+ shard_ranges = [
+ ShardRange('.shards_a/sr-%s' % upper, Timestamp.now(), lower,
+ upper, i + 1, 10 * (i + 1))
+ for i, (lower, upper) in enumerate(bounds)
+ ]
+ # add first shard range
+ own_sr = broker.enable_sharding(Timestamp.now())
+ broker.merge_shard_ranges(shard_ranges[:1])
+
+ # "replicate"
+ part, node = self._get_broker_part_node(broker)
+ daemon = self._run_once(node)
+ self.assertEqual(2, daemon.stats['rsync'])
+
+ # complete rsync to all other nodes
+ def check_replicate(expected_ranges):
+ for i in range(1, 3):
+ remote_broker = self._get_broker('a', 'c', node_index=i)
+ self.assertTrue(os.path.exists(remote_broker.db_file))
+ self.assertShardRangesEqual(
+ expected_ranges,
+ remote_broker.get_shard_ranges(include_deleted=True,
+ include_own=True)
+ )
+ remote_info = remote_broker.get_info()
+ local_info = self._get_broker(
+ 'a', 'c', node_index=0).get_info()
+ for k, v in local_info.items():
+ if k == 'id':
+ continue
+ if k == 'hash':
+ self.assertEqual(remote_info[k], '0' * 32)
+ continue
+ if k == 'object_count':
+ self.assertEqual(remote_info[k], 0)
+ continue
+ self.assertEqual(remote_info[k], v,
+ "mismatch remote %s %r != %r" % (
+ k, remote_info[k], v))
+
+ check_replicate([shard_ranges[0], own_sr])
+
+ # delete and add some more shard ranges
+ shard_ranges[0].deleted = 1
+ shard_ranges[0].timestamp = Timestamp.now()
+ for shard_range in shard_ranges:
+ broker.merge_shard_ranges(shard_range)
+ daemon = self._run_once(node)
+ self.assertEqual(2, daemon.stats['deferred'])
+ check_replicate(shard_ranges + [own_sr])
+
+ def check_replicate(self, from_broker, remote_node_index, repl_conf=None,
+ expect_success=True, errors=None):
+ repl_conf = repl_conf or {}
+ repl_calls = []
+ rsync_calls = []
+
+ def repl_hook(op, *sync_args):
+ repl_calls.append((op, sync_args))
+
+ fake_repl_connection = attach_fake_replication_rpc(
+ self.rpc, replicate_hook=repl_hook, errors=errors)
+ db_replicator.ReplConnection = fake_repl_connection
+ daemon = replicator.ContainerReplicator(
+ repl_conf, logger=debug_logger())
+ self._install_fake_rsync_file(daemon, rsync_calls)
+ part, nodes = self._ring.get_nodes(from_broker.account,
+ from_broker.container)
+
+ def find_node(node_index):
+ for node in nodes:
+ if node['index'] == node_index:
+ return node
+ else:
+ self.fail('Failed to find node index %s' % remote_node_index)
+
+ remote_node = find_node(remote_node_index)
+ info = from_broker.get_replication_info()
+ success = daemon._repl_to_node(remote_node, from_broker, part, info)
+ self.assertEqual(expect_success, success)
+ return daemon, repl_calls, rsync_calls
+
+ def assert_synced_shard_ranges(self, expected, synced_items):
+ expected.sort(key=lambda sr: (sr.lower, sr.upper))
+ for item in synced_items:
+ item.pop('record_type', None)
+ self.assertEqual([dict(ex) for ex in expected], synced_items)
+
+ def assert_info_synced(self, local, remote_node_index, mismatches=None):
+ mismatches = mismatches or []
+ mismatches.append('id')
+ remote = self._get_broker(local.account, local.container,
+ node_index=remote_node_index)
+ local_info = local.get_info()
+ remote_info = remote.get_info()
+ errors = []
+ for k, v in local_info.items():
+ if remote_info.get(k) == v:
+ if k in mismatches:
+ errors.append(
+ "unexpected match remote %s %r == %r" % (
+ k, remote_info[k], v))
+ continue
+ else:
+ if k not in mismatches:
+ errors.append(
+ "unexpected mismatch remote %s %r != %r" % (
+ k, remote_info[k], v))
+ if errors:
+ self.fail('Found sync errors:\n' + '\n'.join(errors))
+
+ def assert_shard_ranges_synced(self, local_broker, remote_broker):
+ self.assertShardRangesEqual(
+ local_broker.get_shard_ranges(include_deleted=True,
+ include_own=True),
+ remote_broker.get_shard_ranges(include_deleted=True,
+ include_own=True)
+ )
+
+ def _setup_replication_test(self, node_index):
+ ts_iter = make_timestamp_iter()
+ policy_idx = POLICIES.default.idx
+ put_timestamp = Timestamp.now().internal
+ # create "local" broker
+ broker = self._get_broker('a', 'c', node_index=node_index)
+ broker.initialize(put_timestamp, policy_idx)
+
+ objs = [{'name': 'blah%03d' % i, 'created_at': next(ts_iter).internal,
+ 'size': i, 'content_type': 'text/plain', 'etag': 'etag%s' % i,
+ 'deleted': 0, 'storage_policy_index': policy_idx}
+ for i in range(20)]
+ bounds = (('', 'a'), ('a', 'b'), ('b', 'c'), ('c', ''))
+ shard_ranges = [
+ ShardRange(
+ '.sharded_a/sr-%s' % upper, Timestamp.now(), lower, upper)
+ for i, (lower, upper) in enumerate(bounds)
+ ]
+ return {'broker': broker,
+ 'objects': objs,
+ 'shard_ranges': shard_ranges}
+
+ def _merge_object(self, broker, objects, index, **kwargs):
+ if not isinstance(index, slice):
+ index = slice(index, index + 1)
+ objs = [dict(obj) for obj in objects[index]]
+ broker.merge_items(objs)
+
+ def _merge_shard_range(self, broker, shard_ranges, index, **kwargs):
+ broker.merge_shard_ranges(shard_ranges[index:index + 1])
+
+ def _goto_sharding_state(self, broker, epoch):
+ broker.enable_sharding(epoch)
+ self.assertTrue(broker.set_sharding_state())
+ self.assertEqual(backend.SHARDING, broker.get_db_state())
+
+ def _goto_sharded_state(self, broker):
+ self.assertTrue(broker.set_sharded_state())
+ self.assertEqual(backend.SHARDED, broker.get_db_state())
+
+ def _assert_local_sharded_in_sync(self, local_broker, local_id):
+ daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1)
+ self.assertEqual(['sync', 'get_shard_ranges', 'merge_shard_ranges'],
+ [call[0] for call in repl_calls])
+ self.assertEqual(1, daemon.stats['deferred'])
+ self.assertEqual(0, daemon.stats['rsync'])
+ self.assertEqual(0, daemon.stats['diff'])
+ self.assertFalse(rsync_calls)
+ # new db sync
+ self.assertEqual(local_id, repl_calls[0][1][2])
+ # ...but we still get a merge_shard_ranges for shard ranges
+ self.assert_synced_shard_ranges(
+ local_broker.get_shard_ranges(include_own=True),
+ repl_calls[2][1][0])
+ self.assertEqual(local_id, repl_calls[2][1][1])
+
+ def _check_only_shard_ranges_replicated(self, local_broker,
+ remote_node_index,
+ repl_conf,
+ expected_shard_ranges,
+ expect_success=True):
+ # expected_shard_ranges is expected final list of sync'd ranges
+ daemon, repl_calls, rsync_calls = self.check_replicate(
+ local_broker, remote_node_index, repl_conf,
+ expect_success=expect_success)
+
+ # we always expect only shard ranges to end in abort
+ self.assertEqual(1, daemon.stats['deferred'])
+ self.assertEqual(0, daemon.stats['diff'])
+ self.assertEqual(0, daemon.stats['rsync'])
+ self.assertEqual(['sync', 'get_shard_ranges', 'merge_shard_ranges'],
+ [call[0] for call in repl_calls])
+ self.assertFalse(rsync_calls)
+ # sync
+ local_id = local_broker.get_info()['id']
+ self.assertEqual(local_id, repl_calls[0][1][2])
+ # get_shard_ranges
+ self.assertEqual((), repl_calls[1][1])
+ # merge_shard_ranges for sending local shard ranges
+ self.assertShardRangesEqual(expected_shard_ranges, repl_calls[2][1][0])
+ self.assertEqual(local_id, repl_calls[2][1][1])
+ remote_broker = self._get_broker(
+ local_broker.account, local_broker.container, node_index=1)
+ self.assertNotEqual(local_id, remote_broker.get_info()['id'])
+ self.assert_shard_ranges_synced(remote_broker, local_broker)
+
+ def test_replication_local_unsharded_remote_missing(self):
+ context = self._setup_replication_test(0)
+ local_broker = context['broker']
+ local_id = local_broker.get_info()['id']
+ objs = context['objects']
+ self._merge_object(index=0, **context)
+
+ daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1)
+
+ self.assert_info_synced(local_broker, 1)
+ self.assertEqual(1, daemon.stats['rsync'])
+ self.assertEqual(['sync', 'complete_rsync'],
+ [call[0] for call in repl_calls])
+ self.assertEqual(local_id, repl_calls[1][1][0])
+ self.assertEqual(os.path.basename(local_broker.db_file),
+ repl_calls[1][1][1])
+ self.assertEqual(local_broker.db_file, rsync_calls[0][0])
+ self.assertEqual(local_id, os.path.basename(rsync_calls[0][1]))
+ self.assertFalse(rsync_calls[1:])
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ self.assert_shard_ranges_synced(local_broker, remote_broker)
+ self.assertTrue(os.path.exists(remote_broker._db_file))
+ self.assertNotEqual(local_id, remote_broker.get_info()['id'])
+ self.assertEqual(objs[:1], remote_broker.get_objects())
+
+ def _check_replication_local_unsharded_remote_sharded(self, repl_conf):
+ context = self._setup_replication_test(0)
+ local_broker = context['broker']
+ local_id = local_broker.get_info()['id']
+ self._merge_object(index=slice(0, 6), **context)
+
+ remote_context = self._setup_replication_test(1)
+ self._merge_object(index=4, **remote_context)
+ remote_broker = remote_context['broker']
+ epoch = Timestamp.now()
+ self._goto_sharding_state(remote_broker, epoch=epoch)
+ remote_context['shard_ranges'][0].object_count = 101
+ remote_context['shard_ranges'][0].bytes_used = 1010
+ remote_context['shard_ranges'][0].state = ShardRange.ACTIVE
+ self._merge_shard_range(index=0, **remote_context)
+ self._merge_object(index=5, **remote_context)
+ self._goto_sharded_state(remote_broker)
+ self.assertEqual(backend.SHARDED, remote_broker.get_db_state())
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ remote_broker.get_shard_ranges(include_own=True))
+
+ remote_broker = self._get_broker(
+ local_broker.account, local_broker.container, node_index=1)
+ self.assertEqual(backend.SHARDED, remote_broker.get_db_state())
+ self.assertFalse(os.path.exists(remote_broker._db_file))
+ self.assertNotEqual(local_id, remote_broker.get_info()['id'])
+ self.assertEqual(remote_context['objects'][5:6],
+ remote_broker.get_objects())
+
+ # Now that we have shard ranges, we're never considered in-sync :-/
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ remote_broker.get_shard_ranges(include_own=True))
+
+ def test_replication_local_unsharded_remote_sharded(self):
+ self._check_replication_local_unsharded_remote_sharded({})
+
+ def test_replication_local_unsharded_remote_sharded_large_diff(self):
+ self._check_replication_local_unsharded_remote_sharded({'per_diff': 1})
+
+ def _check_replication_local_sharding_remote_missing(self, repl_conf):
+ local_context = self._setup_replication_test(0)
+ local_broker = local_context['broker']
+ self._merge_object(index=0, **local_context)
+ self._merge_object(index=1, **local_context)
+ epoch = Timestamp.now()
+ self._goto_sharding_state(local_broker, epoch)
+ self._merge_shard_range(index=0, **local_context)
+ self._merge_object(index=slice(2, 8), **local_context)
+ objs = local_context['objects']
+
+ daemon, repl_calls, rsync_calls = self.check_replicate(
+ local_broker, 1, repl_conf=repl_conf)
+
+ self.assertEqual(['sync', 'complete_rsync'],
+ [call[0] for call in repl_calls])
+ self.assertEqual(1, daemon.stats['rsync'])
+ self.assertEqual(0, daemon.stats['deferred'])
+ self.assertEqual(0, daemon.stats['diff'])
+
+ # fresh db is sync'd first...
+ fresh_id = local_broker.get_info()['id']
+ self.assertEqual(fresh_id, repl_calls[0][1][2])
+ self.assertEqual(fresh_id, repl_calls[1][1][0])
+ # retired db is not sync'd at all
+ old_broker = self.backend(
+ local_broker._db_file, account=local_broker.account,
+ container=local_broker.container, force_db_file=True)
+ old_id = old_broker.get_info()['id']
+ bad_calls = []
+ for call in repl_calls:
+ if old_id in call[1]:
+ bad_calls.append(
+ 'old db id %r in %r call args %r' % (
+ old_id, call[0], call[1]))
+ if bad_calls:
+ self.fail('Found some bad calls:\n' + '\n'.join(bad_calls))
+ # complete_rsync
+ self.assertEqual(os.path.basename(local_broker.db_file),
+ repl_calls[1][1][1])
+ self.assertEqual(local_broker.db_file, rsync_calls[0][0])
+ self.assertEqual(fresh_id, os.path.basename(rsync_calls[0][1]))
+ self.assertFalse(rsync_calls[1:])
+
+ # TODO: make these stats better; in sharding state local broker pulls
+ # stats for 2 objects from old db, whereas remote thinks it's sharded
+ # and has an empty shard range table
+ self.assert_info_synced(local_broker, 1, mismatches=[
+ 'object_count', 'bytes_used', 'db_state'])
+
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ remote_id = remote_broker.get_info()['id']
+ self.assertNotEqual(old_id, remote_id)
+ self.assertNotEqual(fresh_id, remote_id)
+ self.assertEqual(
+ [remote_broker.db_file], get_db_files(remote_broker.db_file))
+ self.assertEqual(os.path.basename(remote_broker.db_file),
+ os.path.basename(local_broker.db_file))
+ self.assertEqual(epoch, remote_broker.db_epoch)
+ # remote db has only the misplaced objects
+ self.assertEqual(objs[2:8], remote_broker.get_objects())
+ self.assert_shard_ranges_synced(local_broker, remote_broker)
+
+ # replicate again, check asserts abort
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ local_broker.get_shard_ranges(include_own=True))
+
+ # sanity
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ self.assertEqual(
+ [remote_broker.db_file], get_db_files(remote_broker.db_file))
+ self.assertEqual(os.path.basename(remote_broker.db_file),
+ os.path.basename(local_broker.db_file))
+ self.assertEqual(objs[2:8], remote_broker.get_objects())
+ self.assertEqual(epoch, remote_broker.db_epoch)
+
+ def test_replication_local_sharding_remote_missing(self):
+ self._check_replication_local_sharding_remote_missing({})
+
+ def test_replication_local_sharding_remote_missing_large_diff(self):
+ # the local shard db has large diff with respect to the old db
+ self._check_replication_local_sharding_remote_missing({'per_diff': 1})
+
+ def _check_replication_local_sharding_remote_unsharded(self, repl_conf):
+ local_context = self._setup_replication_test(0)
+ self._merge_object(index=slice(0, 3), **local_context)
+ local_broker = local_context['broker']
+ epoch = Timestamp.now()
+ self._goto_sharding_state(local_broker, epoch)
+ self._merge_shard_range(index=0, **local_context)
+ self._merge_object(index=slice(3, 11), **local_context)
+
+ remote_context = self._setup_replication_test(1)
+ self._merge_object(index=11, **remote_context)
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ local_broker.get_shard_ranges(include_own=True))
+
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ self.assertEqual(
+ [remote_broker._db_file], get_db_files(remote_broker.db_file))
+ self.assertEqual(remote_context['objects'][11:12],
+ remote_broker.get_objects())
+
+ self.assert_info_synced(
+ local_broker, 1,
+ mismatches=['db_state', 'object_count', 'bytes_used',
+ 'status_changed_at', 'hash'])
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ local_broker.get_shard_ranges(include_own=True))
+
+ def test_replication_local_sharding_remote_unsharded(self):
+ self._check_replication_local_sharding_remote_unsharded({})
+
+ def test_replication_local_sharding_remote_unsharded_large_diff(self):
+ self._check_replication_local_sharding_remote_unsharded(
+ {'per_diff': 1})
+
+ def _check_replication_local_sharding_remote_sharding(self, repl_conf):
+ local_context = self._setup_replication_test(0)
+ self._merge_object(index=slice(0, 5), **local_context)
+ local_broker = local_context['broker']
+ epoch = Timestamp.now()
+ self._goto_sharding_state(local_broker, epoch)
+ self._merge_shard_range(index=0, **local_context)
+ self._merge_object(index=slice(5, 10), **local_context)
+
+ remote_context = self._setup_replication_test(1)
+ self._merge_object(index=12, **remote_context)
+ # take snapshot of info now before transition to sharding...
+ orig_remote_info = remote_context['broker'].get_info()
+ remote_broker = remote_context['broker']
+ self._goto_sharding_state(remote_broker, epoch)
+ self._merge_shard_range(index=0, **remote_context)
+ self._merge_object(index=13, **remote_context)
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ remote_broker.get_shard_ranges(include_own=True))
+
+ # in sharding state brokers only reports object stats from old db, and
+ # they are different
+ self.assert_info_synced(
+ local_broker, 1, mismatches=['object_count', 'bytes_used',
+ 'status_changed_at', 'hash'])
+
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ shard_db = make_db_file_path(remote_broker._db_file, epoch)
+ self.assertEqual([remote_broker._db_file, shard_db],
+ get_db_files(remote_broker.db_file))
+ shard_db = make_db_file_path(remote_broker._db_file, epoch)
+ self.assertEqual([remote_broker._db_file, shard_db],
+ get_db_files(remote_broker.db_file))
+ # no local objects have been sync'd to remote shard db
+ self.assertEqual(remote_context['objects'][13:14],
+ remote_broker.get_objects())
+ # remote *old db* is unchanged
+ remote_old_broker = self.backend(
+ remote_broker._db_file, account=remote_broker.account,
+ container=remote_broker.container, force_db_file=True)
+ self.assertEqual(remote_context['objects'][12:13],
+ remote_old_broker.get_objects())
+ self.assertFalse(remote_old_broker.get_shard_ranges())
+ remote_old_info = remote_old_broker.get_info()
+ orig_remote_info.pop('db_state')
+ remote_old_info.pop('db_state')
+ self.assertEqual(orig_remote_info, remote_old_info)
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ local_broker.get_shard_ranges(include_own=True))
+
+ def test_replication_local_sharding_remote_sharding(self):
+ self._check_replication_local_sharding_remote_sharding({})
+
+ def test_replication_local_sharding_remote_sharding_large_diff(self):
+ self._check_replication_local_sharding_remote_sharding({'per_diff': 1})
+
+ def test_replication_local_sharded_remote_missing(self):
+ local_context = self._setup_replication_test(0)
+ local_broker = local_context['broker']
+ epoch = Timestamp.now()
+ self._goto_sharding_state(local_broker, epoch)
+ local_context['shard_ranges'][0].object_count = 99
+ local_context['shard_ranges'][0].state = ShardRange.ACTIVE
+ self._merge_shard_range(index=0, **local_context)
+ self._merge_object(index=slice(0, 3), **local_context)
+ self._goto_sharded_state(local_broker)
+ objs = local_context['objects']
+
+ daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1)
+
+ self.assertEqual(['sync', 'complete_rsync'],
+ [call[0] for call in repl_calls])
+ self.assertEqual(1, daemon.stats['rsync'])
+
+ # sync
+ local_id = local_broker.get_info()['id']
+ self.assertEqual(local_id, repl_calls[0][1][2])
+ # complete_rsync
+ self.assertEqual(local_id, repl_calls[1][1][0])
+ self.assertEqual(
+ os.path.basename(local_broker.db_file), repl_calls[1][1][1])
+ self.assertEqual(local_broker.db_file, rsync_calls[0][0])
+ self.assertEqual(local_id, os.path.basename(rsync_calls[0][1]))
+ self.assertFalse(rsync_calls[1:])
+
+ self.assert_info_synced(local_broker, 1)
+
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ remote_id = remote_broker.get_info()['id']
+ self.assertNotEqual(local_id, remote_id)
+ shard_db = make_db_file_path(remote_broker._db_file, epoch)
+ self.assertEqual([shard_db],
+ get_db_files(remote_broker.db_file))
+ self.assertEqual(objs[:3], remote_broker.get_objects())
+ self.assertEqual(local_broker.get_shard_ranges(),
+ remote_broker.get_shard_ranges())
+
+ # sanity check - in sync
+ self._assert_local_sharded_in_sync(local_broker, local_id)
+
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ shard_db = make_db_file_path(remote_broker._db_file, epoch)
+ self.assertEqual([shard_db],
+ get_db_files(remote_broker.db_file))
+ # the remote broker object_count comes from replicated shard range...
+ self.assertEqual(99, remote_broker.get_info()['object_count'])
+ # these are replicated misplaced objects...
+ self.assertEqual(objs[:3], remote_broker.get_objects())
+ self.assertEqual(local_broker.get_shard_ranges(),
+ remote_broker.get_shard_ranges())
+
+ def _check_replication_local_sharded_remote_unsharded(self, repl_conf):
+ local_context = self._setup_replication_test(0)
+ local_broker = local_context['broker']
+ epoch = Timestamp.now()
+ self._goto_sharding_state(local_broker, epoch)
+ local_context['shard_ranges'][0].object_count = 99
+ local_context['shard_ranges'][0].state = ShardRange.ACTIVE
+ self._merge_shard_range(index=0, **local_context)
+ self._merge_object(index=slice(0, 3), **local_context)
+ self._goto_sharded_state(local_broker)
+
+ remote_context = self._setup_replication_test(1)
+ self._merge_object(index=4, **remote_context)
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ local_broker.get_shard_ranges(include_own=True),
+ expect_success=True)
+
+ # sharded broker takes object count from shard range whereas remote
+ # unsharded broker takes it from object table
+ self.assert_info_synced(
+ local_broker, 1,
+ mismatches=['db_state', 'object_count', 'bytes_used',
+ 'status_changed_at', 'hash'])
+
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ self.assertEqual([remote_broker._db_file],
+ get_db_files(remote_broker.db_file))
+ self.assertEqual(remote_context['objects'][4:5],
+ remote_broker.get_objects())
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ local_broker.get_shard_ranges(include_own=True),
+ expect_success=True)
+
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ self.assertEqual([remote_broker._db_file],
+ get_db_files(remote_broker.db_file))
+ self.assertEqual(remote_context['objects'][4:5],
+ remote_broker.get_objects())
+
+ def test_replication_local_sharded_remote_unsharded(self):
+ self._check_replication_local_sharded_remote_unsharded({})
+
+ def test_replication_local_sharded_remote_unsharded_large_diff(self):
+ self._check_replication_local_sharded_remote_unsharded({'per_diff': 1})
+
+ def _check_replication_local_sharded_remote_sharding(self, repl_conf):
+ local_context = self._setup_replication_test(0)
+ local_broker = local_context['broker']
+ epoch = Timestamp.now()
+ self._goto_sharding_state(local_broker, epoch=epoch)
+ local_context['shard_ranges'][0].object_count = 99
+ local_context['shard_ranges'][0].bytes_used = 999
+ local_context['shard_ranges'][0].state = ShardRange.ACTIVE
+ self._merge_shard_range(index=0, **local_context)
+ self._merge_object(index=slice(0, 5), **local_context)
+ self._goto_sharded_state(local_broker)
+
+ remote_context = self._setup_replication_test(1)
+ self._merge_object(index=6, **remote_context)
+ remote_broker = remote_context['broker']
+ remote_info_orig = remote_broker.get_info()
+ self._goto_sharding_state(remote_broker, epoch=epoch)
+ self._merge_shard_range(index=0, **remote_context)
+ self._merge_object(index=7, **remote_context)
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ # remote has newer timestamp for shard range
+ remote_broker.get_shard_ranges(include_own=True),
+ expect_success=True)
+
+ # sharded broker takes object count from shard range whereas remote
+ # sharding broker takes it from object table
+ self.assert_info_synced(
+ local_broker, 1,
+ mismatches=['db_state', 'object_count', 'bytes_used',
+ 'status_changed_at', 'hash'])
+
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ shard_db = make_db_file_path(remote_broker._db_file, epoch)
+ self.assertEqual([remote_broker._db_file, shard_db],
+ get_db_files(remote_broker.db_file))
+ # remote fresh db objects are unchanged
+ self.assertEqual(remote_context['objects'][7:8],
+ remote_broker.get_objects())
+ # remote old hash.db objects are unchanged
+ remote_old_broker = self.backend(
+ remote_broker._db_file, account=remote_broker.account,
+ container=remote_broker.container, force_db_file=True)
+ self.assertEqual(
+ remote_context['objects'][6:7],
+ remote_old_broker.get_objects())
+ remote_info = remote_old_broker.get_info()
+ remote_info_orig.pop('db_state')
+ remote_info.pop('db_state')
+ self.assertEqual(remote_info_orig, remote_info)
+ self.assertEqual(local_broker.get_shard_ranges(),
+ remote_broker.get_shard_ranges())
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ remote_broker.get_shard_ranges(include_own=True),
+ expect_success=True)
+
+ def test_replication_local_sharded_remote_sharding(self):
+ self._check_replication_local_sharded_remote_sharding({})
+
+ def test_replication_local_sharded_remote_sharding_large_diff(self):
+ self._check_replication_local_sharded_remote_sharding({'per_diff': 1})
+
+ def _check_replication_local_sharded_remote_sharded(self, repl_conf):
+ local_context = self._setup_replication_test(0)
+ local_broker = local_context['broker']
+ epoch = Timestamp.now()
+ self._goto_sharding_state(local_broker, epoch)
+ local_context['shard_ranges'][0].object_count = 99
+ local_context['shard_ranges'][0].bytes_used = 999
+ local_context['shard_ranges'][0].state = ShardRange.ACTIVE
+ self._merge_shard_range(index=0, **local_context)
+ self._merge_object(index=slice(0, 6), **local_context)
+ self._goto_sharded_state(local_broker)
+
+ remote_context = self._setup_replication_test(1)
+ self._merge_object(index=6, **remote_context)
+ remote_broker = remote_context['broker']
+ self._goto_sharding_state(remote_broker, epoch)
+ remote_context['shard_ranges'][0].object_count = 101
+ remote_context['shard_ranges'][0].bytes_used = 1010
+ remote_context['shard_ranges'][0].state = ShardRange.ACTIVE
+ self._merge_shard_range(index=0, **remote_context)
+ self._merge_object(index=7, **remote_context)
+ self._goto_sharded_state(remote_broker)
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ # remote has newer timestamp for shard range
+ remote_broker.get_shard_ranges(include_own=True),
+ expect_success=True)
+
+ self.assert_info_synced(
+ local_broker, 1,
+ mismatches=['status_changed_at', 'hash'])
+
+ remote_broker = self._get_broker('a', 'c', node_index=1)
+ shard_db = make_db_file_path(remote_broker._db_file, epoch)
+ self.assertEqual([shard_db],
+ get_db_files(remote_broker.db_file))
+ self.assertEqual(remote_context['objects'][7:8],
+ remote_broker.get_objects())
+ # remote shard range was newer than local so object count is not
+ # updated by sync'd shard range
+ self.assertEqual(
+ 101, remote_broker.get_shard_ranges()[0].object_count)
+
+ self._check_only_shard_ranges_replicated(
+ local_broker, 1, repl_conf,
+ # remote has newer timestamp for shard range
+ remote_broker.get_shard_ranges(include_own=True),
+ expect_success=True)
+
+ def test_replication_local_sharded_remote_sharded(self):
+ self._check_replication_local_sharded_remote_sharded({})
+
+ def test_replication_local_sharded_remote_sharded_large_diff(self):
+ self._check_replication_local_sharded_remote_sharded({'per_diff': 1})
+
+ def test_replication_rsync_then_merge_aborts_before_merge_sharding(self):
+ # verify that rsync_then_merge aborts if remote starts sharding during
+ # the rsync
+ local_context = self._setup_replication_test(0)
+ local_broker = local_context['broker']
+ self._merge_object(index=slice(0, 3), **local_context)
+ remote_context = self._setup_replication_test(1)
+ remote_broker = remote_context['broker']
+ remote_broker.logger = debug_logger()
+ self._merge_object(index=5, **remote_context)
+
+ orig_func = replicator.ContainerReplicatorRpc.rsync_then_merge
+
+ def mock_rsync_then_merge(*args):
+ remote_broker.merge_shard_ranges(
+ ShardRange('.shards_a/cc', Timestamp.now()))
+ self._goto_sharding_state(remote_broker, Timestamp.now())
+ return orig_func(*args)
+
+ with mock.patch(
+ 'swift.container.replicator.ContainerReplicatorRpc.'
+ 'rsync_then_merge',
+ mock_rsync_then_merge):
+ with mock.patch(
+ 'swift.container.backend.ContainerBroker.'
+ 'get_items_since') as mock_get_items_since:
+ daemon, repl_calls, rsync_calls = self.check_replicate(
+ local_broker, 1, expect_success=False,
+ repl_conf={'per_diff': 1})
+
+ mock_get_items_since.assert_not_called()
+ self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'],
+ [call[0] for call in repl_calls])
+ self.assertEqual(local_broker.db_file, rsync_calls[0][0])
+ self.assertEqual(local_broker.get_info()['id'],
+ os.path.basename(rsync_calls[0][1]))
+ self.assertFalse(rsync_calls[1:])
+
+ def test_replication_rsync_then_merge_aborts_before_merge_sharded(self):
+ # verify that rsync_then_merge aborts if remote completes sharding
+ # during the rsync
+ local_context = self._setup_replication_test(0)
+ local_broker = local_context['broker']
+ self._merge_object(index=slice(0, 3), **local_context)
+ remote_context = self._setup_replication_test(1)
+ remote_broker = remote_context['broker']
+ remote_broker.logger = debug_logger()
+ self._merge_object(index=5, **remote_context)
+
+ orig_func = replicator.ContainerReplicatorRpc.rsync_then_merge
+
+ def mock_rsync_then_merge(*args):
+ remote_broker.merge_shard_ranges(
+ ShardRange('.shards_a/cc', Timestamp.now()))
+ self._goto_sharding_state(remote_broker, Timestamp.now())
+ self._goto_sharded_state(remote_broker)
+ return orig_func(*args)
+
+ with mock.patch(
+ 'swift.container.replicator.ContainerReplicatorRpc.'
+ 'rsync_then_merge',
+ mock_rsync_then_merge):
+ with mock.patch(
+ 'swift.container.backend.ContainerBroker.'
+ 'get_items_since') as mock_get_items_since:
+ daemon, repl_calls, rsync_calls = self.check_replicate(
+ local_broker, 1, expect_success=False,
+ repl_conf={'per_diff': 1})
+
+ mock_get_items_since.assert_not_called()
+ self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'],
+ [call[0] for call in repl_calls])
+ self.assertEqual(local_broker.db_file, rsync_calls[0][0])
+ self.assertEqual(local_broker.get_info()['id'],
+ os.path.basename(rsync_calls[0][1]))
+ self.assertFalse(rsync_calls[1:])
+
+ def test_replication_rsync_then_merge_aborts_after_merge_sharding(self):
+ # verify that rsync_then_merge aborts if remote starts sharding during
+ # the merge
+ local_context = self._setup_replication_test(0)
+ local_broker = local_context['broker']
+ self._merge_object(index=slice(0, 3), **local_context)
+ remote_context = self._setup_replication_test(1)
+ remote_broker = remote_context['broker']
+ remote_broker.logger = debug_logger()
+ self._merge_object(index=5, **remote_context)
+
+ orig_get_items_since = backend.ContainerBroker.get_items_since
+ calls = []
+
+ def fake_get_items_since(broker, *args):
+ # remote starts sharding while rpc call is merging
+ if not calls:
+ remote_broker.merge_shard_ranges(
+ ShardRange('.shards_a/cc', Timestamp.now()))
+ self._goto_sharding_state(remote_broker, Timestamp.now())
+ calls.append(args)
+ return orig_get_items_since(broker, *args)
+
+ with mock.patch(
+ 'swift.container.backend.ContainerBroker.get_items_since',
+ fake_get_items_since):
+ daemon, repl_calls, rsync_calls = self.check_replicate(
+ local_broker, 1, expect_success=False,
+ repl_conf={'per_diff': 1})
+
+ self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'],
+ [call[0] for call in repl_calls])
+ self.assertEqual(local_broker.db_file, rsync_calls[0][0])
+ self.assertEqual(local_broker.get_info()['id'],
+ os.path.basename(rsync_calls[0][1]))
+ self.assertFalse(rsync_calls[1:])
+
+ def test_replication_rsync_then_merge_aborts_after_merge_sharded(self):
+ # verify that rsync_then_merge aborts if remote completes sharding
+ # during the merge
+ local_context = self._setup_replication_test(0)
+ local_broker = local_context['broker']
+ self._merge_object(index=slice(0, 3), **local_context)
+ remote_context = self._setup_replication_test(1)
+ remote_broker = remote_context['broker']
+ remote_broker.logger = debug_logger()
+ self._merge_object(index=5, **remote_context)
+
+ orig_get_items_since = backend.ContainerBroker.get_items_since
+ calls = []
+
+ def fake_get_items_since(broker, *args):
+ # remote starts sharding while rpc call is merging
+ result = orig_get_items_since(broker, *args)
+ if calls:
+ remote_broker.merge_shard_ranges(
+ ShardRange('.shards_a/cc', Timestamp.now()))
+ self._goto_sharding_state(remote_broker, Timestamp.now())
+ self._goto_sharded_state(remote_broker)
+ calls.append(args)
+ return result
+
+ with mock.patch(
+ 'swift.container.backend.ContainerBroker.get_items_since',
+ fake_get_items_since):
+ daemon, repl_calls, rsync_calls = self.check_replicate(
+ local_broker, 1, expect_success=False,
+ repl_conf={'per_diff': 1})
+
+ self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'],
+ [call[0] for call in repl_calls])
+ self.assertEqual(local_broker.db_file, rsync_calls[0][0])
+ self.assertEqual(local_broker.get_info()['id'],
+ os.path.basename(rsync_calls[0][1]))
+ self.assertFalse(rsync_calls[1:])
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/unit/container/test_server.py b/test/unit/container/test_server.py
index 54ce6d973b..916f0e146d 100644
--- a/test/unit/container/test_server.py
+++ b/test/unit/container/test_server.py
@@ -22,6 +22,7 @@ import itertools
from contextlib import contextmanager
from shutil import rmtree
from tempfile import mkdtemp
+from test.unit import make_timestamp_iter, mock_timestamp_now
from time import gmtime
from xml.dom import minidom
import time
@@ -40,12 +41,13 @@ import swift.container
from swift.container import server as container_server
from swift.common import constraints
from swift.common.utils import (Timestamp, mkdirs, public, replication,
- storage_directory, lock_parent_directory)
+ storage_directory, lock_parent_directory,
+ ShardRange)
from test.unit import fake_http_connect, debug_logger, mock_check_drive
from swift.common.storage_policy import (POLICIES, StoragePolicy)
from swift.common.request_helpers import get_sys_meta_prefix
-from test import listen_zero
+from test import listen_zero, annotate_failure
from test.unit import patch_policies
@@ -86,6 +88,16 @@ class TestContainerController(unittest.TestCase):
"""
pass
+ def _put_shard_range(self, shard_range):
+ put_timestamp = shard_range.timestamp.internal
+ headers = {'X-Backend-Record-Type': 'shard',
+ 'X-Timestamp': put_timestamp}
+ body = json.dumps([dict(shard_range)])
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers,
+ body=body)
+ resp = req.get_response(self.controller)
+ self.assertIn(resp.status_int, (201, 202))
+
def _check_put_container_storage_policy(self, req, policy_index):
resp = req.get_response(self.controller)
self.assertEqual(201, resp.status_int)
@@ -95,6 +107,11 @@ class TestContainerController(unittest.TestCase):
self.assertEqual(str(policy_index),
resp.headers['X-Backend-Storage-Policy-Index'])
+ def _assert_shard_ranges_equal(self, x, y):
+ # ShardRange.__eq__ only compares lower and upper; here we generate
+ # dict representations to compare all attributes
+ self.assertEqual([dict(sr) for sr in x], [dict(sr) for sr in y])
+
def test_creation(self):
# later config should be extended to assert more config options
replicator = container_server.ContainerController(
@@ -424,7 +441,7 @@ class TestContainerController(unittest.TestCase):
elif state[0] == 'race':
# Save the original db_file attribute value
self._saved_db_file = self.db_file
- self.db_file += '.doesnotexist'
+ self._db_file += '.doesnotexist'
def initialize(self, *args, **kwargs):
if state[0] == 'initial':
@@ -433,7 +450,7 @@ class TestContainerController(unittest.TestCase):
elif state[0] == 'race':
# Restore the original db_file attribute to get the race
# behavior
- self.db_file = self._saved_db_file
+ self._db_file = self._saved_db_file
return super(InterceptedCoBr, self).initialize(*args, **kwargs)
with mock.patch("swift.container.server.ContainerBroker",
@@ -1372,21 +1389,100 @@ class TestContainerController(unittest.TestCase):
self.assertEqual(resp.status_int, 500)
def test_DELETE(self):
+ ts_iter = make_timestamp_iter()
req = Request.blank(
'/sda1/p/a/c',
- environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '1'})
+ environ={'REQUEST_METHOD': 'PUT'},
+ headers={'X-Timestamp': next(ts_iter).internal})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 201)
+
+ # PUT an *empty* shard range
+ sr = ShardRange('.shards_a/c', next(ts_iter), 'l', 'u', 0, 0,
+ state=ShardRange.ACTIVE)
req = Request.blank(
'/sda1/p/a/c',
- environ={'REQUEST_METHOD': 'DELETE'}, headers={'X-Timestamp': '2'})
+ environ={'REQUEST_METHOD': 'PUT'},
+ headers={'X-Timestamp': next(ts_iter).internal,
+ 'X-Backend-Record-Type': 'shard'},
+ body=json.dumps([dict(sr)]))
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 202)
+
+ req = Request.blank(
+ '/sda1/p/a/c',
+ environ={'REQUEST_METHOD': 'DELETE'},
+ headers={'X-Timestamp': next(ts_iter).internal})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 204)
+
req = Request.blank(
'/sda1/p/a/c',
- environ={'REQUEST_METHOD': 'GET'}, headers={'X-Timestamp': '3'})
+ environ={'REQUEST_METHOD': 'GET'},
+ headers={'X-Timestamp': next(ts_iter).internal})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 404)
+ req = Request.blank(
+ '/sda1/p/a/c',
+ environ={'REQUEST_METHOD': 'GET'},
+ headers={'X-Timestamp': next(ts_iter).internal,
+ 'X-Backend-Record-Type': 'shard'},
+ params={'format': 'json'})
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 404)
+
+ # the override-deleted header is ignored for object records
+ req = Request.blank(
+ '/sda1/p/a/c',
+ environ={'REQUEST_METHOD': 'GET'},
+ headers={'X-Timestamp': next(ts_iter).internal,
+ 'X-Backend-Override-Deleted': 'true'},
+ params={'format': 'json'})
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 404)
+
+ # but override-deleted header makes shard ranges available after DELETE
+ req = Request.blank(
+ '/sda1/p/a/c',
+ environ={'REQUEST_METHOD': 'GET'},
+ headers={'X-Timestamp': next(ts_iter).internal,
+ 'X-Backend-Record-Type': 'shard',
+ 'X-Backend-Override-Deleted': 'true'},
+ params={'format': 'json'})
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 200)
+ self.assertEqual([dict(sr, last_modified=sr.timestamp.isoformat)],
+ json.loads(resp.body))
+ self.assertIn('X-Backend-Record-Type', resp.headers)
+ self.assertEqual('shard', resp.headers['X-Backend-Record-Type'])
+
+ # ... unless the override header equates to False
+ req = Request.blank(
+ '/sda1/p/a/c',
+ environ={'REQUEST_METHOD': 'GET'},
+ headers={'X-Timestamp': next(ts_iter).internal,
+ 'X-Backend-Record-Type': 'shard',
+ 'X-Backend-Override-Deleted': 'no'},
+ params={'format': 'json'})
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 404)
+ self.assertNotIn('X-Backend-Record-Type', resp.headers)
+
+ # ...or the db file is unlinked
+ broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c')
+ self.assertTrue(os.path.exists(broker.db_file))
+ os.unlink(broker.db_file)
+ self.assertFalse(os.path.exists(broker.db_file))
+ req = Request.blank(
+ '/sda1/p/a/c',
+ environ={'REQUEST_METHOD': 'GET'},
+ headers={'X-Timestamp': next(ts_iter).internal,
+ 'X-Backend-Record-Type': 'shard',
+ 'X-Backend-Override-Deleted': 'true'},
+ params={'format': 'json'})
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 404)
+ self.assertNotIn('X-Backend-Record-Type', resp.headers)
def test_DELETE_PUT_recreate(self):
path = '/sda1/p/a/c'
@@ -1460,7 +1556,7 @@ class TestContainerController(unittest.TestCase):
self.assertEqual(True, db.is_deleted())
# now save a copy of this db (and remove it from the "current node")
db = self.controller._get_container_broker('sda1', 'p', 'a', 'c')
- db_path = db.db_file
+ db_path = db._db_file
other_path = os.path.join(self.testdir, 'othernode.db')
os.rename(db_path, other_path)
# that should make it missing on this node
@@ -1474,6 +1570,8 @@ class TestContainerController(unittest.TestCase):
def mock_exists(db_path):
rv = _real_exists(db_path)
+ if db_path != db._db_file:
+ return rv
if not mock_called:
# be as careful as we might hope backend replication can be...
with lock_parent_directory(db_path, timeout=1):
@@ -2040,6 +2138,1140 @@ class TestContainerController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 412)
+ def test_PUT_shard_range_autocreates_shard_container(self):
+ ts_iter = make_timestamp_iter()
+ shard_range = ShardRange('.shards_a/shard_c', next(ts_iter))
+ put_timestamp = next(ts_iter).internal
+ headers = {'X-Backend-Record-Type': 'shard',
+ 'X-Timestamp': put_timestamp,
+ 'X-Container-Sysmeta-Test': 'set',
+ 'X-Container-Meta-Test': 'persisted'}
+
+ # PUT shard range to non-existent container with non-autocreate prefix
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers,
+ body=json.dumps([dict(shard_range)]))
+ resp = req.get_response(self.controller)
+ self.assertEqual(404, resp.status_int)
+
+ # PUT shard range to non-existent container with autocreate prefix,
+ # missing storage policy
+ headers['X-Timestamp'] = next(ts_iter).internal
+ req = Request.blank(
+ '/sda1/p/.shards_a/shard_c', method='PUT', headers=headers,
+ body=json.dumps([dict(shard_range)]))
+ resp = req.get_response(self.controller)
+ self.assertEqual(400, resp.status_int)
+ self.assertIn('X-Backend-Storage-Policy-Index header is required',
+ resp.body)
+
+ # PUT shard range to non-existent container with autocreate prefix
+ headers['X-Timestamp'] = next(ts_iter).internal
+ policy_index = random.choice(POLICIES).idx
+ headers['X-Backend-Storage-Policy-Index'] = str(policy_index)
+ req = Request.blank(
+ '/sda1/p/.shards_a/shard_c', method='PUT', headers=headers,
+ body=json.dumps([dict(shard_range)]))
+ resp = req.get_response(self.controller)
+ self.assertEqual(201, resp.status_int)
+
+ # repeat PUT of shard range to autocreated container - 204 response
+ headers['X-Timestamp'] = next(ts_iter).internal
+ headers.pop('X-Backend-Storage-Policy-Index') # no longer required
+ req = Request.blank(
+ '/sda1/p/.shards_a/shard_c', method='PUT', headers=headers,
+ body=json.dumps([dict(shard_range)]))
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+
+ # regular PUT to autocreated container - 204 response
+ headers['X-Timestamp'] = next(ts_iter).internal
+ req = Request.blank(
+ '/sda1/p/.shards_a/shard_c', method='PUT',
+ headers={'X-Timestamp': next(ts_iter).internal},
+ body=json.dumps([dict(shard_range)]))
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+
+ def test_PUT_shard_range_to_deleted_container(self):
+ ts_iter = make_timestamp_iter()
+ put_time = next(ts_iter).internal
+ # create a container, get it to sharded state and then delete it
+ req = Request.blank('/sda1/p/a/c', method='PUT',
+ headers={'X-Timestamp': put_time})
+ resp = req.get_response(self.controller)
+ self.assertEqual(201, resp.status_int)
+
+ broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c')
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+ self.assertTrue(broker.set_sharded_state())
+
+ delete_time = next(ts_iter).internal
+ req = Request.blank('/sda1/p/a/c', method='DELETE',
+ headers={'X-Timestamp': delete_time})
+ resp = req.get_response(self.controller)
+ self.assertEqual(204, resp.status_int)
+ self.assertTrue(broker.is_deleted())
+ self.assertEqual(delete_time, broker.get_info()['delete_timestamp'])
+ self.assertEqual(put_time, broker.get_info()['put_timestamp'])
+ req = Request.blank('/sda1/p/a/c', method='GET')
+ resp = req.get_response(self.controller)
+ self.assertEqual(404, resp.status_int)
+
+ # shard range PUT is accepted but container remains deleted
+ shard_range = ShardRange('.shards_a/shard_c', next(ts_iter),
+ state=ShardRange.ACTIVE)
+ headers = {'X-Backend-Record-Type': 'shard',
+ 'X-Timestamp': next(ts_iter).internal,
+ 'X-Container-Sysmeta-Test': 'set',
+ 'X-Container-Meta-Test': 'persisted'}
+
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers,
+ body=json.dumps([dict(shard_range)]))
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+ self.assertTrue(broker.get_info_is_deleted()[1])
+ self.assertEqual(delete_time, broker.get_info()['delete_timestamp'])
+ self.assertEqual(put_time, broker.get_info()['put_timestamp'])
+ req = Request.blank('/sda1/p/a/c', method='GET')
+ resp = req.get_response(self.controller)
+ self.assertEqual(404, resp.status_int)
+
+ # unless shard range has non-zero stats, then container is revived
+ shard_range.update_meta(99, 1234, meta_timestamp=next(ts_iter))
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers,
+ body=json.dumps([dict(shard_range)]))
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+ self.assertFalse(broker.get_info_is_deleted()[1])
+ self.assertEqual(delete_time, broker.get_info()['delete_timestamp'])
+ self.assertEqual(put_time, broker.get_info()['put_timestamp'])
+ req = Request.blank('/sda1/p/a/c', method='GET')
+ resp = req.get_response(self.controller)
+ self.assertEqual(204, resp.status_int)
+ self.assertEqual('99', resp.headers['X-Container-Object-Count'])
+
+ def test_PUT_shard_range_json_in_body(self):
+ ts_iter = make_timestamp_iter()
+ oldest_ts = next(ts_iter) # used for stale shard range PUT later
+ shard_bounds = [('', 'ham', ShardRange.ACTIVE),
+ ('ham', 'salami', ShardRange.ACTIVE),
+ ('salami', '', ShardRange.CREATED)]
+ shard_ranges = [
+ ShardRange('.shards_a/_%s' % upper, next(ts_iter),
+ lower, upper,
+ i * 100, i * 1000, meta_timestamp=next(ts_iter),
+ state=state, state_timestamp=next(ts_iter))
+ for i, (lower, upper, state) in enumerate(shard_bounds)]
+
+ put_timestamp = next(ts_iter).internal
+ headers = {'X-Backend-Record-Type': 'shard',
+ 'X-Timestamp': put_timestamp,
+ 'X-Container-Sysmeta-Test': 'set',
+ 'X-Container-Meta-Test': 'persisted'}
+ body = json.dumps([dict(sr) for sr in shard_ranges[:2]])
+
+ # PUT some shard ranges to non-existent container
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers,
+ body=body)
+ resp = req.get_response(self.controller)
+ self.assertEqual(404, resp.status_int)
+
+ # create the container with a regular PUT
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT',
+ headers={'X-Timestamp': put_timestamp}, body=body)
+ resp = req.get_response(self.controller)
+ self.assertEqual(201, resp.status_int)
+
+ # now we can PUT shard ranges
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers,
+ body=body)
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+
+ # check broker
+ broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c')
+ # sysmeta and user meta is updated
+ exp_meta = {'X-Container-Sysmeta-Test': 'set',
+ 'X-Container-Meta-Test': 'persisted'}
+ self.assertEqual(
+ exp_meta, dict((k, v[0]) for k, v in broker.metadata.items()))
+ self.assertEqual(put_timestamp, broker.get_info()['put_timestamp'])
+ self._assert_shard_ranges_equal(shard_ranges[:2],
+ broker.get_shard_ranges())
+
+ # empty json dict
+ body = json.dumps({})
+ headers['X-Timestamp'] = next(ts_iter).internal
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+ self.assertEqual(
+ exp_meta, dict((k, v[0]) for k, v in broker.metadata.items()))
+ self._assert_shard_ranges_equal(shard_ranges[:2],
+ broker.get_shard_ranges())
+ self.assertEqual(put_timestamp, broker.get_info()['put_timestamp'])
+
+ older_ts = next(ts_iter) # used for stale shard range PUT later
+ # updated and new shard ranges
+ shard_ranges[1].bytes_used += 100
+ shard_ranges[1].meta_timestamp = next(ts_iter)
+ body = json.dumps([dict(sr) for sr in shard_ranges[1:]])
+ headers['X-Timestamp'] = next(ts_iter).internal
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+ self.assertEqual(
+ exp_meta, dict((k, v[0]) for k, v in broker.metadata.items()))
+ self._assert_shard_ranges_equal(shard_ranges,
+ broker.get_shard_ranges())
+ self.assertEqual(put_timestamp, broker.get_info()['put_timestamp'])
+
+ # stale shard range
+ stale_shard_range = shard_ranges[1].copy()
+ stale_shard_range.bytes_used = 0
+ stale_shard_range.object_count = 0
+ stale_shard_range.meta_timestamp = older_ts
+ stale_shard_range.state = ShardRange.CREATED
+ stale_shard_range.state_timestamp = oldest_ts
+ body = json.dumps([dict(stale_shard_range)])
+ headers['X-Timestamp'] = next(ts_iter).internal
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+ self.assertEqual(
+ exp_meta, dict((k, v[0]) for k, v in broker.metadata.items()))
+ self._assert_shard_ranges_equal(shard_ranges,
+ broker.get_shard_ranges())
+ self.assertEqual(put_timestamp, broker.get_info()['put_timestamp'])
+
+ # deleted shard range
+ shard_ranges[0].deleted = 1
+ shard_ranges[0].timestamp = next(ts_iter)
+ body = json.dumps([dict(shard_ranges[0])])
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+ self.assertEqual(
+ exp_meta, dict((k, v[0]) for k, v in broker.metadata.items()))
+ self._assert_shard_ranges_equal(
+ shard_ranges, broker.get_shard_ranges(include_deleted=True))
+ self.assertEqual(put_timestamp, broker.get_info()['put_timestamp'])
+
+ def check_bad_body(body):
+ bad_put_timestamp = next(ts_iter).internal
+ headers['X-Timestamp'] = bad_put_timestamp
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ resp = req.get_response(self.controller)
+ self.assertEqual(400, resp.status_int)
+ self.assertIn('Invalid body', resp.body)
+ self.assertEqual(
+ exp_meta, dict((k, v[0]) for k, v in broker.metadata.items()))
+ self._assert_shard_ranges_equal(
+ shard_ranges, broker.get_shard_ranges(include_deleted=True))
+ self.assertEqual(put_timestamp, broker.get_info()['put_timestamp'])
+
+ check_bad_body('not json')
+ check_bad_body('')
+ check_bad_body('["not a shard range"]')
+ check_bad_body('[[]]')
+ bad_shard_range = dict(ShardRange('a/c', next(ts_iter)))
+ bad_shard_range.pop('timestamp')
+ check_bad_body(json.dumps([bad_shard_range]))
+
+ def check_not_shard_record_type(headers):
+ # body ignored
+ body = json.dumps([dict(sr) for sr in shard_ranges])
+ # note, regular PUT so put timestamp is updated
+ put_timestamp = next(ts_iter).internal
+ headers['X-Timestamp'] = put_timestamp
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ resp = req.get_response(self.controller)
+ self.assertEqual(202, resp.status_int)
+ self._assert_shard_ranges_equal(
+ shard_ranges, broker.get_shard_ranges(include_deleted=True))
+ self.assertEqual(put_timestamp, broker.get_info()['put_timestamp'])
+
+ check_not_shard_record_type({'X-Backend-Record-Type': 'object',
+ 'X-Timestamp': next(ts_iter).internal})
+
+ check_not_shard_record_type({'X-Timestamp': next(ts_iter).internal})
+
+ def test_PUT_GET_shard_ranges(self):
+ # make a container
+ ts_iter = make_timestamp_iter()
+ ts_now = Timestamp.now() # used when mocking Timestamp.now()
+ headers = {'X-Timestamp': next(ts_iter).normal}
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers)
+ self.assertEqual(201, req.get_response(self.controller).status_int)
+ # PUT some objects
+ objects = [{'name': 'obj_%d' % i,
+ 'x-timestamp': next(ts_iter).normal,
+ 'x-content-type': 'text/plain',
+ 'x-etag': 'etag_%d' % i,
+ 'x-size': 1024 * i
+ } for i in range(2)]
+ for obj in objects:
+ req = Request.blank('/sda1/p/a/c/%s' % obj['name'], method='PUT',
+ headers=obj)
+ self._update_object_put_headers(req)
+ resp = req.get_response(self.controller)
+ self.assertEqual(201, resp.status_int)
+ # PUT some shard ranges
+ shard_bounds = [('', 'apple', ShardRange.SHRINKING),
+ ('apple', 'ham', ShardRange.CLEAVED),
+ ('ham', 'salami', ShardRange.ACTIVE),
+ ('salami', 'yoghurt', ShardRange.CREATED),
+ ('yoghurt', '', ShardRange.FOUND),
+ ]
+ shard_ranges = [
+ ShardRange('.sharded_a/_%s' % upper, next(ts_iter),
+ lower, upper,
+ i * 100, i * 1000, meta_timestamp=next(ts_iter),
+ state=state, state_timestamp=next(ts_iter))
+ for i, (lower, upper, state) in enumerate(shard_bounds)]
+ for shard_range in shard_ranges:
+ self._put_shard_range(shard_range)
+
+ broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c')
+ self.assertTrue(broker.is_root_container()) # sanity
+ self._assert_shard_ranges_equal(shard_ranges,
+ broker.get_shard_ranges())
+
+ # sanity check - no shard ranges when GET is only for objects
+ def check_object_GET(path):
+ req = Request.blank(path, method='GET')
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 200)
+ self.assertEqual(resp.content_type, 'application/json')
+ expected = [
+ dict(hash=obj['x-etag'], bytes=obj['x-size'],
+ content_type=obj['x-content-type'],
+ last_modified=Timestamp(obj['x-timestamp']).isoformat,
+ name=obj['name']) for obj in objects]
+ self.assertEqual(expected, json.loads(resp.body))
+ self.assertIn('X-Backend-Record-Type', resp.headers)
+ self.assertEqual('object', resp.headers['X-Backend-Record-Type'])
+
+ check_object_GET('/sda1/p/a/c?format=json')
+
+ # GET only shard ranges
+ def check_shard_GET(expected_shard_ranges, path, params=''):
+ req = Request.blank('/sda1/p/%s?format=json%s' %
+ (path, params), method='GET',
+ headers={'X-Backend-Record-Type': 'shard'})
+ with mock_timestamp_now(ts_now):
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 200)
+ self.assertEqual(resp.content_type, 'application/json')
+ expected = [
+ dict(sr, last_modified=Timestamp(sr.timestamp).isoformat)
+ for sr in expected_shard_ranges]
+ self.assertEqual(expected, json.loads(resp.body))
+ self.assertIn('X-Backend-Record-Type', resp.headers)
+ self.assertEqual('shard', resp.headers['X-Backend-Record-Type'])
+
+ # all shards
+ check_shard_GET(shard_ranges, 'a/c')
+ check_shard_GET(reversed(shard_ranges), 'a/c', params='&reverse=true')
+ # only created shards
+ check_shard_GET(shard_ranges[3:4], 'a/c', params='&states=created')
+ # only found shards
+ check_shard_GET(shard_ranges[4:5], 'a/c', params='&states=found')
+ # only cleaved shards
+ check_shard_GET(shard_ranges[1:2], 'a/c',
+ params='&states=cleaved')
+ # only active shards
+ check_shard_GET(shard_ranges[2:3], 'a/c',
+ params='&states=active&end_marker=pickle')
+ # only cleaved or active shards, reversed
+ check_shard_GET(
+ reversed(shard_ranges[1:3]), 'a/c',
+ params='&states=cleaved,active&reverse=true&marker=pickle')
+ # only shrinking shards
+ check_shard_GET(shard_ranges[:1], 'a/c',
+ params='&states=shrinking&end_marker=pickle')
+ check_shard_GET(shard_ranges[:1], 'a/c',
+ params='&states=shrinking&reverse=true&marker=pickle')
+ # only active or shrinking shards
+ check_shard_GET([shard_ranges[0], shard_ranges[2]], 'a/c',
+ params='&states=shrinking,active&end_marker=pickle')
+ check_shard_GET(
+ [shard_ranges[2], shard_ranges[0]], 'a/c',
+ params='&states=active,shrinking&reverse=true&marker=pickle')
+ # only active or shrinking shards using listing alias
+ check_shard_GET(shard_ranges[:3], 'a/c',
+ params='&states=listing&end_marker=pickle')
+ check_shard_GET(
+ reversed(shard_ranges[:3]), 'a/c',
+ params='&states=listing&reverse=true&marker=pickle')
+ # only created, cleaved, active, shrinking shards using updating alias
+ check_shard_GET(shard_ranges[1:4], 'a/c',
+ params='&states=updating&end_marker=treacle')
+ check_shard_GET(
+ reversed(shard_ranges[1:4]), 'a/c',
+ params='&states=updating&reverse=true&marker=treacle')
+
+ # listing shards don't cover entire namespace so expect an extra filler
+ extra_shard_range = ShardRange(
+ 'a/c', ts_now, shard_ranges[2].upper, ShardRange.MAX, 2, 1024,
+ state=ShardRange.ACTIVE)
+ expected = shard_ranges[:3] + [extra_shard_range]
+ check_shard_GET(expected, 'a/c', params='&states=listing')
+ check_shard_GET(reversed(expected), 'a/c',
+ params='&states=listing&reverse=true')
+ expected = [shard_ranges[2], extra_shard_range]
+ check_shard_GET(expected, 'a/c',
+ params='&states=listing&marker=pickle')
+ check_shard_GET(
+ reversed(expected), 'a/c',
+ params='&states=listing&reverse=true&end_marker=pickle')
+ # updating shards don't cover entire namespace so expect a filler
+ extra_shard_range = ShardRange(
+ 'a/c', ts_now, shard_ranges[3].upper, ShardRange.MAX, 2, 1024,
+ state=ShardRange.ACTIVE)
+ expected = shard_ranges[1:4] + [extra_shard_range]
+ check_shard_GET(expected, 'a/c', params='&states=updating')
+ check_shard_GET(reversed(expected), 'a/c',
+ params='&states=updating&reverse=true')
+ # when no listing shard ranges cover the requested namespace range then
+ # filler is for entire requested namespace
+ extra_shard_range = ShardRange(
+ 'a/c', ts_now, 'treacle', ShardRange.MAX, 2, 1024,
+ state=ShardRange.ACTIVE)
+ check_shard_GET([extra_shard_range], 'a/c',
+ params='&states=listing&marker=treacle')
+ check_shard_GET(
+ [extra_shard_range], 'a/c',
+ params='&states=listing&reverse=true&end_marker=treacle')
+ extra_shard_range = ShardRange(
+ 'a/c', ts_now, 'treacle', 'walnut', 2, 1024,
+ state=ShardRange.ACTIVE)
+ params = '&states=listing&marker=treacle&end_marker=walnut'
+ check_shard_GET([extra_shard_range], 'a/c', params=params)
+ params = '&states=listing&reverse=true&marker=walnut' + \
+ '&end_marker=treacle'
+ check_shard_GET([extra_shard_range], 'a/c', params=params)
+ # specific object
+ check_shard_GET(shard_ranges[1:2], 'a/c', params='&includes=cheese')
+ check_shard_GET(shard_ranges[1:2], 'a/c', params='&includes=ham')
+ check_shard_GET(shard_ranges[2:3], 'a/c', params='&includes=pickle')
+ check_shard_GET(shard_ranges[2:3], 'a/c', params='&includes=salami')
+ check_shard_GET(shard_ranges[3:4], 'a/c', params='&includes=walnut')
+ check_shard_GET(shard_ranges[3:4], 'a/c',
+ params='&includes=walnut&reverse=true')
+ # with marker
+ check_shard_GET(shard_ranges[1:], 'a/c', params='&marker=cheese')
+ check_shard_GET(reversed(shard_ranges[:2]), 'a/c',
+ params='&marker=cheese&reverse=true')
+ check_shard_GET(shard_ranges[2:], 'a/c', params='&marker=ham')
+ check_shard_GET(reversed(shard_ranges[:2]), 'a/c',
+ params='&marker=ham&reverse=true')
+ check_shard_GET(shard_ranges[2:], 'a/c', params='&marker=pickle')
+ check_shard_GET(reversed(shard_ranges[:3]), 'a/c',
+ params='&marker=pickle&reverse=true')
+ check_shard_GET(shard_ranges[3:], 'a/c', params='&marker=salami')
+ check_shard_GET(reversed(shard_ranges[:3]), 'a/c',
+ params='&marker=salami&reverse=true')
+ check_shard_GET(shard_ranges[3:], 'a/c', params='&marker=walnut')
+ check_shard_GET(reversed(shard_ranges[:4]), 'a/c',
+ params='&marker=walnut&reverse=true')
+ # with end marker
+ check_shard_GET(shard_ranges[:2], 'a/c', params='&end_marker=cheese')
+ check_shard_GET(reversed(shard_ranges[1:]), 'a/c',
+ params='&end_marker=cheese&reverse=true')
+ # everything in range 'apple' - 'ham' is <= end_marker of 'ham' so that
+ # range is not included because end_marker is non-inclusive
+ check_shard_GET(shard_ranges[:2], 'a/c', params='&end_marker=ham')
+ check_shard_GET(reversed(shard_ranges[2:]), 'a/c',
+ params='&end_marker=ham&reverse=true')
+ check_shard_GET(shard_ranges[:3], 'a/c', params='&end_marker=pickle')
+ check_shard_GET(reversed(shard_ranges[2:]), 'a/c',
+ params='&end_marker=pickle&reverse=true')
+ check_shard_GET(shard_ranges[:3], 'a/c', params='&end_marker=salami')
+ check_shard_GET(reversed(shard_ranges[3:]), 'a/c',
+ params='&end_marker=salami&reverse=true')
+ check_shard_GET(shard_ranges[:4], 'a/c', params='&end_marker=walnut')
+ check_shard_GET(reversed(shard_ranges[3:]), 'a/c',
+ params='&end_marker=walnut&reverse=true')
+ # with marker and end marker
+ check_shard_GET(shard_ranges[1:2], 'a/c',
+ params='&marker=cheese&end_marker=egg')
+ check_shard_GET(shard_ranges[1:2], 'a/c',
+ params='&end_marker=cheese&marker=egg&reverse=true')
+ check_shard_GET(shard_ranges[1:3], 'a/c',
+ params='&marker=egg&end_marker=jam')
+ check_shard_GET(reversed(shard_ranges[1:3]), 'a/c',
+ params='&end_marker=egg&marker=jam&reverse=true')
+ check_shard_GET(shard_ranges[1:4], 'a/c',
+ params='&marker=cheese&end_marker=walnut')
+ check_shard_GET(reversed(shard_ranges[1:4]), 'a/c',
+ params='&end_marker=cheese&marker=walnut&reverse=true')
+ check_shard_GET(shard_ranges[2:4], 'a/c',
+ params='&marker=jam&end_marker=walnut')
+ check_shard_GET(reversed(shard_ranges[2:4]), 'a/c',
+ params='&end_marker=jam&marker=walnut&reverse=true')
+ check_shard_GET(shard_ranges[3:4], 'a/c',
+ params='&marker=toast&end_marker=walnut')
+ check_shard_GET(shard_ranges[3:4], 'a/c',
+ params='&end_marker=toast&marker=walnut&reverse=true')
+ check_shard_GET([], 'a/c',
+ params='&marker=egg&end_marker=cheese')
+ check_shard_GET([], 'a/c',
+ params='&marker=cheese&end_marker=egg&reverse=true')
+
+ # delete a shard range
+ shard_range = shard_ranges[1]
+ shard_range.set_deleted(timestamp=next(ts_iter))
+ self._put_shard_range(shard_range)
+
+ self._assert_shard_ranges_equal(shard_ranges[:1] + shard_ranges[2:],
+ broker.get_shard_ranges())
+
+ check_shard_GET(shard_ranges[:1] + shard_ranges[2:], 'a/c')
+ check_shard_GET(shard_ranges[2:3], 'a/c', params='&includes=jam')
+ # specify obj, marker or end_marker not in any shard range
+ check_shard_GET([], 'a/c', params='&includes=cheese')
+ check_shard_GET([], 'a/c', params='&includes=cheese&reverse=true')
+ check_shard_GET([], 'a/c', params='&includes=ham')
+ check_shard_GET(shard_ranges[2:], 'a/c/', params='&marker=cheese')
+ check_shard_GET(shard_ranges[:1], 'a/c/',
+ params='&marker=cheese&reverse=true')
+ check_shard_GET(shard_ranges[:1], 'a/c/', params='&end_marker=cheese')
+ check_shard_GET(reversed(shard_ranges[2:]), 'a/c/',
+ params='&end_marker=cheese&reverse=true')
+
+ self.assertFalse(self.controller.logger.get_lines_for_level('warning'))
+ self.assertFalse(self.controller.logger.get_lines_for_level('error'))
+
+ def test_GET_shard_ranges_using_state_aliases(self):
+ # make a shard container
+ ts_iter = make_timestamp_iter()
+ ts_now = Timestamp.now() # used when mocking Timestamp.now()
+ shard_ranges = []
+ lower = ''
+ for state in sorted(ShardRange.STATES.keys()):
+ upper = str(state)
+ shard_ranges.append(
+ ShardRange('.shards_a/c_%s' % upper, next(ts_iter),
+ lower, upper, state * 100, state * 1000,
+ meta_timestamp=next(ts_iter),
+ state=state, state_timestamp=next(ts_iter)))
+ lower = upper
+
+ def do_test(root_path, path, params, expected_states):
+ expected = [
+ sr for sr in shard_ranges if sr.state in expected_states]
+ own_shard_range = ShardRange(path, next(ts_iter), '', '',
+ state=ShardRange.ACTIVE)
+ expected.append(own_shard_range.copy(
+ lower=expected[-1].upper, meta_timestamp=ts_now))
+ expected = [dict(sr, last_modified=sr.timestamp.isoformat)
+ for sr in expected]
+ headers = {'X-Timestamp': next(ts_iter).normal}
+
+ # create container
+ req = Request.blank(
+ '/sda1/p/%s' % path, method='PUT', headers=headers)
+ self.assertIn(
+ req.get_response(self.controller).status_int, (201, 202))
+ # PUT some shard ranges
+ headers = {'X-Timestamp': next(ts_iter).normal,
+ 'X-Container-Sysmeta-Shard-Root': root_path,
+ 'X-Backend-Record-Type': 'shard'}
+ body = json.dumps(
+ [dict(sr) for sr in shard_ranges + [own_shard_range]])
+ req = Request.blank(
+ '/sda1/p/%s' % path, method='PUT', headers=headers, body=body)
+ self.assertEqual(202, req.get_response(self.controller).status_int)
+
+ req = Request.blank('/sda1/p/%s?format=json%s' %
+ (path, params), method='GET',
+ headers={'X-Backend-Record-Type': 'shard'})
+ with mock_timestamp_now(ts_now):
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 200)
+ self.assertEqual(resp.content_type, 'application/json')
+ self.assertEqual(expected, json.loads(resp.body))
+ self.assertIn('X-Backend-Record-Type', resp.headers)
+ self.assertEqual('shard', resp.headers['X-Backend-Record-Type'])
+
+ # root's shard ranges for listing
+ root_path = container_path = 'a/c'
+ params = '&states=listing'
+ expected_states = [
+ ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING,
+ ShardRange.SHRINKING]
+ do_test(root_path, container_path, params, expected_states)
+
+ # shard's shard ranges for listing
+ container_path = '.shards_a/c'
+ params = '&states=listing'
+ do_test(root_path, container_path, params, expected_states)
+
+ # root's shard ranges for updating
+ params = '&states=updating'
+ expected_states = [
+ ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE,
+ ShardRange.SHARDING]
+ container_path = root_path
+ do_test(root_path, container_path, params, expected_states)
+
+ # shard's shard ranges for updating
+ container_path = '.shards_a/c'
+ do_test(root_path, container_path, params, expected_states)
+
+ def test_GET_shard_ranges_include_deleted(self):
+ # make a shard container
+ ts_iter = make_timestamp_iter()
+ ts_now = Timestamp.now() # used when mocking Timestamp.now()
+ shard_ranges = []
+ lower = ''
+ for state in sorted(ShardRange.STATES.keys()):
+ upper = str(state)
+ shard_ranges.append(
+ ShardRange('.shards_a/c_%s' % upper, next(ts_iter),
+ lower, upper, state * 100, state * 1000,
+ meta_timestamp=next(ts_iter),
+ state=state, state_timestamp=next(ts_iter)))
+ lower = upper
+ # create container
+ headers = {'X-Timestamp': next(ts_iter).normal}
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers)
+ self.assertIn(
+ req.get_response(self.controller).status_int, (201, 202))
+ # PUT some shard ranges
+ headers = {'X-Timestamp': next(ts_iter).normal,
+ 'X-Backend-Record-Type': 'shard'}
+ body = json.dumps([dict(sr) for sr in shard_ranges])
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ self.assertEqual(202, req.get_response(self.controller).status_int)
+
+ def do_test(include_deleted, expected):
+ expected = [dict(sr, last_modified=sr.timestamp.isoformat)
+ for sr in expected]
+ headers = {'X-Backend-Record-Type': 'shard',
+ 'X-Backend-Include-Deleted': str(include_deleted)}
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers=headers)
+ with mock_timestamp_now(ts_now):
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 200)
+ self.assertEqual(resp.content_type, 'application/json')
+ self.assertEqual(expected, json.loads(resp.body))
+ self.assertIn('X-Backend-Record-Type', resp.headers)
+ self.assertEqual('shard', resp.headers['X-Backend-Record-Type'])
+
+ do_test(False, shard_ranges)
+ do_test(True, shard_ranges)
+
+ headers = {'X-Timestamp': next(ts_iter).normal,
+ 'X-Backend-Record-Type': 'shard'}
+ for sr in shard_ranges[::2]:
+ sr.set_deleted(timestamp=next(ts_iter))
+ body = json.dumps([dict(sr) for sr in shard_ranges])
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ self.assertEqual(202, req.get_response(self.controller).status_int)
+ broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c')
+ self._assert_shard_ranges_equal(
+ shard_ranges[1::2], broker.get_shard_ranges())
+ do_test(False, shard_ranges[1::2])
+ do_test(True, shard_ranges)
+
+ headers = {'X-Timestamp': next(ts_iter).normal,
+ 'X-Backend-Record-Type': 'shard'}
+ for sr in shard_ranges[1::2]:
+ sr.set_deleted(timestamp=next(ts_iter))
+ body = json.dumps([dict(sr) for sr in shard_ranges])
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ self.assertEqual(202, req.get_response(self.controller).status_int)
+ self.assertFalse(broker.get_shard_ranges())
+ do_test(False, [])
+ do_test(True, shard_ranges)
+
+ def test_GET_shard_ranges_errors(self):
+ # verify that x-backend-record-type is not included in error responses
+ ts_iter = make_timestamp_iter()
+ ts_now = Timestamp.now() # used when mocking Timestamp.now()
+ shard_ranges = []
+ lower = ''
+ for state in sorted(ShardRange.STATES.keys()):
+ upper = str(state)
+ shard_ranges.append(
+ ShardRange('.shards_a/c_%s' % upper, next(ts_iter),
+ lower, upper, state * 100, state * 1000,
+ meta_timestamp=next(ts_iter),
+ state=state, state_timestamp=next(ts_iter)))
+ lower = upper
+ # create container
+ headers = {'X-Timestamp': next(ts_iter).normal}
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers)
+ self.assertIn(
+ req.get_response(self.controller).status_int, (201, 202))
+ # PUT some shard ranges
+ headers = {'X-Timestamp': next(ts_iter).normal,
+ 'X-Backend-Record-Type': 'shard'}
+ body = json.dumps([dict(sr) for sr in shard_ranges])
+ req = Request.blank(
+ '/sda1/p/a/c', method='PUT', headers=headers, body=body)
+ self.assertEqual(202, req.get_response(self.controller).status_int)
+
+ def do_test(params):
+ params['format'] = 'json'
+ headers = {'X-Backend-Record-Type': 'shard'}
+ req = Request.blank('/sda1/p/a/c', method='GET',
+ headers=headers, params=params)
+ with mock_timestamp_now(ts_now):
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.content_type, 'text/html')
+ self.assertNotIn('X-Backend-Record-Type', resp.headers)
+ self.assertNotIn('X-Backend-Sharding-State', resp.headers)
+ self.assertNotIn('X-Container-Object-Count', resp.headers)
+ self.assertNotIn('X-Container-Bytes-Used', resp.headers)
+ self.assertNotIn('X-Timestamp', resp.headers)
+ self.assertNotIn('X-PUT-Timestamp', resp.headers)
+ return resp
+
+ resp = do_test({'states': 'bad'})
+ self.assertEqual(resp.status_int, 400)
+ resp = do_test({'delimiter': 'bad'})
+ self.assertEqual(resp.status_int, 412)
+ resp = do_test({'limit': str(constraints.CONTAINER_LISTING_LIMIT + 1)})
+ self.assertEqual(resp.status_int, 412)
+ with mock.patch('swift.container.server.check_drive',
+ lambda *args: False):
+ resp = do_test({})
+ self.assertEqual(resp.status_int, 507)
+
+ # delete the container
+ req = Request.blank('/sda1/p/a/c', method='DELETE',
+ headers={'X-Timestamp': next(ts_iter).normal})
+ self.assertEqual(204, req.get_response(self.controller).status_int)
+
+ resp = do_test({'states': 'bad'})
+ self.assertEqual(resp.status_int, 404)
+
+ def test_GET_auto_record_type(self):
+ # make a container
+ ts_iter = make_timestamp_iter()
+ ts_now = Timestamp.now() # used when mocking Timestamp.now()
+ headers = {'X-Timestamp': next(ts_iter).normal}
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers)
+ self.assertEqual(201, req.get_response(self.controller).status_int)
+ # PUT some objects
+ objects = [{'name': 'obj_%d' % i,
+ 'x-timestamp': next(ts_iter).normal,
+ 'x-content-type': 'text/plain',
+ 'x-etag': 'etag_%d' % i,
+ 'x-size': 1024 * i
+ } for i in range(2)]
+ for obj in objects:
+ req = Request.blank('/sda1/p/a/c/%s' % obj['name'], method='PUT',
+ headers=obj)
+ self._update_object_put_headers(req)
+ resp = req.get_response(self.controller)
+ self.assertEqual(201, resp.status_int)
+ # PUT some shard ranges
+ shard_bounds = [('', 'm', ShardRange.CLEAVED),
+ ('m', '', ShardRange.CREATED)]
+ shard_ranges = [
+ ShardRange('.sharded_a/_%s' % upper, next(ts_iter),
+ lower, upper,
+ i * 100, i * 1000, meta_timestamp=next(ts_iter),
+ state=state, state_timestamp=next(ts_iter))
+ for i, (lower, upper, state) in enumerate(shard_bounds)]
+ for shard_range in shard_ranges:
+ self._put_shard_range(shard_range)
+
+ broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c')
+
+ def assert_GET_objects(req, expected_objects):
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 200)
+ self.assertEqual(resp.content_type, 'application/json')
+ expected = [
+ dict(hash=obj['x-etag'], bytes=obj['x-size'],
+ content_type=obj['x-content-type'],
+ last_modified=Timestamp(obj['x-timestamp']).isoformat,
+ name=obj['name']) for obj in expected_objects]
+ self.assertEqual(expected, json.loads(resp.body))
+ self.assertIn('X-Backend-Record-Type', resp.headers)
+ self.assertEqual(
+ 'object', resp.headers.pop('X-Backend-Record-Type'))
+ resp.headers.pop('Content-Length')
+ return resp
+
+ def assert_GET_shard_ranges(req, expected_shard_ranges):
+ with mock_timestamp_now(ts_now):
+ resp = req.get_response(self.controller)
+ self.assertEqual(resp.status_int, 200)
+ self.assertEqual(resp.content_type, 'application/json')
+ expected = [
+ dict(sr, last_modified=Timestamp(sr.timestamp).isoformat)
+ for sr in expected_shard_ranges]
+ self.assertEqual(expected, json.loads(resp.body))
+ self.assertIn('X-Backend-Record-Type', resp.headers)
+ self.assertEqual(
+ 'shard', resp.headers.pop('X-Backend-Record-Type'))
+ resp.headers.pop('Content-Length')
+ return resp
+
+ # unsharded
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers={'X-Backend-Record-Type': 'auto'})
+ resp = assert_GET_objects(req, objects)
+ headers = resp.headers
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers={'X-Backend-Record-Type': 'shard'})
+ resp = assert_GET_shard_ranges(req, shard_ranges)
+ self.assertEqual(headers, resp.headers)
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers={'X-Backend-Record-Type': 'object'})
+ resp = assert_GET_objects(req, objects)
+ self.assertEqual(headers, resp.headers)
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET')
+ resp = assert_GET_objects(req, objects)
+ self.assertEqual(headers, resp.headers)
+
+ # move to sharding state
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers={'X-Backend-Record-Type': 'auto'})
+ resp = assert_GET_shard_ranges(req, shard_ranges)
+ headers = resp.headers
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers={'X-Backend-Record-Type': 'shard'})
+ resp = assert_GET_shard_ranges(req, shard_ranges)
+ self.assertEqual(headers, resp.headers)
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers={'X-Backend-Record-Type': 'object'})
+ resp = assert_GET_objects(req, objects)
+ self.assertEqual(headers, resp.headers)
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET')
+ resp = assert_GET_objects(req, objects)
+ self.assertEqual(headers, resp.headers)
+
+ # limit is applied to objects but not shard ranges
+ req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET',
+ headers={'X-Backend-Record-Type': 'auto'})
+ resp = assert_GET_shard_ranges(req, shard_ranges)
+ headers = resp.headers
+ req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET',
+ headers={'X-Backend-Record-Type': 'shard'})
+ resp = assert_GET_shard_ranges(req, shard_ranges)
+ self.assertEqual(headers, resp.headers)
+ req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET',
+ headers={'X-Backend-Record-Type': 'object'})
+ resp = assert_GET_objects(req, objects[:1])
+ self.assertEqual(headers, resp.headers)
+ req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET')
+ resp = assert_GET_objects(req, objects[:1])
+ self.assertEqual(headers, resp.headers)
+
+ # move to sharded state
+ self.assertTrue(broker.set_sharded_state())
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers={'X-Backend-Record-Type': 'auto'})
+ resp = assert_GET_shard_ranges(req, shard_ranges)
+ headers = resp.headers
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers={'X-Backend-Record-Type': 'shard'})
+ resp = assert_GET_shard_ranges(req, shard_ranges)
+ self.assertEqual(headers, resp.headers)
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET',
+ headers={'X-Backend-Record-Type': 'object'})
+ resp = assert_GET_objects(req, [])
+ self.assertEqual(headers, resp.headers)
+ req = Request.blank('/sda1/p/a/c?format=json', method='GET')
+ resp = assert_GET_objects(req, [])
+ self.assertEqual(headers, resp.headers)
+
+ def test_PUT_GET_to_sharding_container(self):
+ broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c')
+ ts_iter = make_timestamp_iter()
+ headers = {'X-Timestamp': next(ts_iter).normal}
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers)
+ self.assertEqual(201, req.get_response(self.controller).status_int)
+
+ def do_update(name, timestamp=None, headers=None):
+ # Make a PUT request to container controller to update an object
+ timestamp = timestamp or next(ts_iter)
+ headers = headers or {}
+ headers.update({'X-Timestamp': timestamp.internal,
+ 'X-Size': 17,
+ 'X-Content-Type': 'text/plain',
+ 'X-Etag': 'fake etag'})
+ req = Request.blank(
+ '/sda1/p/a/c/%s' % name, method='PUT', headers=headers)
+ self._update_object_put_headers(req)
+ resp = req.get_response(self.controller)
+ self.assertEqual(201, resp.status_int)
+
+ def get_api_listing():
+ req = Request.blank(
+ '/sda1/p/a/c', method='GET', params={'format': 'json'})
+ resp = req.get_response(self.controller)
+ self.assertEqual(200, resp.status_int)
+ return [obj['name'] for obj in json.loads(resp.body)]
+
+ def assert_broker_rows(broker, expected_names, expected_max_row):
+ self.assertEqual(expected_max_row, broker.get_max_row())
+ with broker.get() as conn:
+ curs = conn.execute('''
+ SELECT * FROM object WHERE ROWID > -1 ORDER BY ROWID ASC
+ ''')
+ actual = [r[1] for r in curs]
+
+ self.assertEqual(expected_names, actual)
+
+ do_update('unsharded')
+ self.assertEqual(['unsharded'], get_api_listing())
+ assert_broker_rows(broker, ['unsharded'], 1)
+
+ # move container to sharding state
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+ assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1)
+ assert_broker_rows(broker.get_brokers()[1], [], 1)
+
+ # add another update - should not merge into the older db and therefore
+ # not appear in api listing
+ do_update('sharding')
+ self.assertEqual(['unsharded'], get_api_listing())
+ assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1)
+ assert_broker_rows(broker.get_brokers()[1], ['sharding'], 2)
+
+ orig_lister = swift.container.backend.ContainerBroker.list_objects_iter
+
+ def mock_list_objects_iter(*args, **kwargs):
+ # cause an update to land in the pending file after it has been
+ # flushed by get_info() calls in the container PUT method, but
+ # before it is flushed by the call to list_objects_iter
+ do_update('racing_update')
+ return orig_lister(*args, **kwargs)
+
+ with mock.patch(
+ 'swift.container.backend.ContainerBroker.list_objects_iter',
+ mock_list_objects_iter):
+ listing = get_api_listing()
+
+ self.assertEqual(['unsharded'], listing)
+ assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1)
+ assert_broker_rows(broker.get_brokers()[1], ['sharding'], 2)
+
+ # next listing will flush pending file
+ listing = get_api_listing()
+ self.assertEqual(['unsharded'], listing)
+ assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1)
+ assert_broker_rows(broker.get_brokers()[1],
+ ['sharding', 'racing_update'], 3)
+
+ def _check_object_update_redirected_to_shard(self, method):
+ expected_status = 204 if method == 'DELETE' else 201
+ broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c')
+ ts_iter = make_timestamp_iter()
+ headers = {'X-Timestamp': next(ts_iter).normal}
+ req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers)
+ self.assertEqual(201, req.get_response(self.controller).status_int)
+
+ def do_update(name, timestamp=None, headers=None):
+ # Make a PUT request to container controller to update an object
+ timestamp = timestamp or next(ts_iter)
+ headers = headers or {}
+ headers.update({'X-Timestamp': timestamp.internal,
+ 'X-Size': 17,
+ 'X-Content-Type': 'text/plain',
+ 'X-Etag': 'fake etag'})
+ req = Request.blank(
+ '/sda1/p/a/c/%s' % name, method=method, headers=headers)
+ self._update_object_put_headers(req)
+ return req.get_response(self.controller)
+
+ def get_listing(broker_index):
+ # index -1 is always the freshest db
+ sub_broker = broker.get_brokers()[broker_index]
+ return sub_broker.get_objects()
+
+ def assert_not_redirected(obj_name, timestamp=None, headers=None):
+ resp = do_update(obj_name, timestamp=timestamp, headers=headers)
+ self.assertEqual(expected_status, resp.status_int)
+ self.assertNotIn('Location', resp.headers)
+ self.assertNotIn('X-Backend-Redirect-Timestamp', resp.headers)
+
+ def assert_redirected(obj_name, shard_range, headers=None):
+ resp = do_update(obj_name, headers=headers)
+ self.assertEqual(301, resp.status_int)
+ self.assertEqual('/%s/%s' % (shard_range.name, obj_name),
+ resp.headers['Location'])
+ self.assertEqual(shard_range.timestamp.internal,
+ resp.headers['X-Backend-Redirect-Timestamp'])
+
+ # sanity check
+ ts_bashful_orig = next(ts_iter)
+ mocked_fn = 'swift.container.backend.ContainerBroker.get_shard_ranges'
+ with mock.patch(mocked_fn) as mock_get_shard_ranges:
+ assert_not_redirected('bashful', ts_bashful_orig)
+ mock_get_shard_ranges.assert_not_called()
+
+ shard_ranges = {
+ 'dopey': ShardRange(
+ '.sharded_a/sr_dopey', next(ts_iter), '', 'dopey'),
+ 'happy': ShardRange(
+ '.sharded_a/sr_happy', next(ts_iter), 'dopey', 'happy'),
+ '': ShardRange('.sharded_a/sr_', next(ts_iter), 'happy', '')
+ }
+ # start with only the middle shard range
+ self._put_shard_range(shard_ranges['happy'])
+
+ # db not yet sharding but shard ranges exist
+ sr_happy = shard_ranges['happy']
+ redirect_states = (
+ ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE,
+ ShardRange.SHARDING)
+ headers = {'X-Backend-Accept-Redirect': 'true'}
+ for state in ShardRange.STATES:
+ self.assertTrue(
+ sr_happy.update_state(state,
+ state_timestamp=next(ts_iter)))
+ self._put_shard_range(sr_happy)
+ with annotate_failure(state):
+ obj_name = 'grumpy%s' % state
+ if state in redirect_states:
+ assert_redirected(obj_name, sr_happy, headers=headers)
+ self.assertNotIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+ else:
+ assert_not_redirected(obj_name, headers=headers)
+ self.assertIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+ obj_name = 'grumpy%s_no_header' % state
+ with mock.patch(mocked_fn) as mock_get_shard_ranges:
+ assert_not_redirected(obj_name)
+ mock_get_shard_ranges.assert_not_called()
+ self.assertIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+
+ # set broker to sharding state
+ broker.enable_sharding(next(ts_iter))
+ self.assertTrue(broker.set_sharding_state())
+ for state in ShardRange.STATES:
+ self.assertTrue(
+ sr_happy.update_state(state,
+ state_timestamp=next(ts_iter)))
+ self._put_shard_range(sr_happy)
+ with annotate_failure(state):
+ obj_name = 'grumpier%s' % state
+ if state in redirect_states:
+ assert_redirected(obj_name, sr_happy, headers=headers)
+ self.assertNotIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+ else:
+ assert_not_redirected(obj_name, headers=headers)
+ # update goes to fresh db, misplaced
+ self.assertIn(
+ obj_name, [obj['name'] for obj in get_listing(-1)])
+ self.assertNotIn(
+ obj_name, [obj['name'] for obj in get_listing(0)])
+ obj_name = 'grumpier%s_no_header' % state
+ with mock.patch(mocked_fn) as mock_get_shard_ranges:
+ assert_not_redirected(obj_name)
+ mock_get_shard_ranges.assert_not_called()
+ self.assertIn(
+ obj_name, [obj['name'] for obj in get_listing(-1)])
+ # update is misplaced, not in retiring db
+ self.assertNotIn(
+ obj_name, [obj['name'] for obj in get_listing(0)])
+
+ # no shard for this object yet so it is accepted by root container
+ # and stored in misplaced objects...
+ assert_not_redirected('dopey', timestamp=next(ts_iter))
+ self.assertIn('dopey', [obj['name'] for obj in get_listing(-1)])
+ self.assertNotIn('dopey', [obj['name'] for obj in get_listing(0)])
+
+ # now PUT the first shard range
+ sr_dopey = shard_ranges['dopey']
+ sr_dopey.update_state(ShardRange.CLEAVED,
+ state_timestamp=next(ts_iter))
+ self._put_shard_range(sr_dopey)
+ for state in ShardRange.STATES:
+ self.assertTrue(
+ sr_happy.update_state(state,
+ state_timestamp=next(ts_iter)))
+ self._put_shard_range(sr_happy)
+ with annotate_failure(state):
+ obj_name = 'dopey%s' % state
+ if state in redirect_states:
+ assert_redirected(obj_name, sr_happy, headers=headers)
+ self.assertNotIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+ self.assertNotIn(obj_name,
+ [obj['name'] for obj in get_listing(0)])
+ else:
+ assert_not_redirected(obj_name, headers=headers)
+ self.assertIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+ self.assertNotIn(obj_name,
+ [obj['name'] for obj in get_listing(0)])
+ obj_name = 'dopey%s_no_header' % state
+ with mock.patch(mocked_fn) as mock_get_shard_ranges:
+ assert_not_redirected(obj_name)
+ mock_get_shard_ranges.assert_not_called()
+ self.assertIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+ self.assertNotIn(obj_name,
+ [obj['name'] for obj in get_listing(0)])
+
+ # further updates to bashful and dopey are now redirected...
+ assert_redirected('bashful', sr_dopey, headers=headers)
+ assert_redirected('dopey', sr_dopey, headers=headers)
+ # ...and existing updates in this container are *not* updated
+ self.assertEqual([ts_bashful_orig.internal],
+ [obj['created_at'] for obj in get_listing(0)
+ if obj['name'] == 'bashful'])
+
+ # set broker to sharded state
+ self.assertTrue(broker.set_sharded_state())
+ for state in ShardRange.STATES:
+ self.assertTrue(
+ sr_happy.update_state(state,
+ state_timestamp=next(ts_iter)))
+ self._put_shard_range(sr_happy)
+ with annotate_failure(state):
+ obj_name = 'grumpiest%s' % state
+ if state in redirect_states:
+ assert_redirected(obj_name, sr_happy, headers=headers)
+ self.assertNotIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+ else:
+ assert_not_redirected(obj_name, headers=headers)
+ self.assertIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+ obj_name = 'grumpiest%s_no_header' % state
+ with mock.patch(mocked_fn) as mock_get_shard_ranges:
+ assert_not_redirected(obj_name)
+ mock_get_shard_ranges.assert_not_called()
+ self.assertIn(obj_name,
+ [obj['name'] for obj in get_listing(-1)])
+
+ def test_PUT_object_update_redirected_to_shard(self):
+ self._check_object_update_redirected_to_shard('PUT')
+
+ def test_DELETE_object_update_redirected_to_shard(self):
+ self._check_object_update_redirected_to_shard('DELETE')
+
def test_GET_json(self):
# make a container
req = Request.blank(
@@ -2389,7 +3621,7 @@ class TestContainerController(unittest.TestCase):
req = Request.blank(
'/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT',
'HTTP_X_TIMESTAMP': '0'})
- resp = req.get_response(self.controller)
+ req.get_response(self.controller)
# fill the container
for i in range(3):
req = Request.blank(
@@ -2407,6 +3639,24 @@ class TestContainerController(unittest.TestCase):
resp = req.get_response(self.controller)
result = resp.body.split()
self.assertEqual(result, ['2', ])
+ # test limit with end_marker
+ req = Request.blank('/sda1/p/a/c?limit=2&end_marker=1',
+ environ={'REQUEST_METHOD': 'GET'})
+ resp = req.get_response(self.controller)
+ result = resp.body.split()
+ self.assertEqual(result, ['0', ])
+ # test limit, reverse with end_marker
+ req = Request.blank('/sda1/p/a/c?limit=2&end_marker=1&reverse=True',
+ environ={'REQUEST_METHOD': 'GET'})
+ resp = req.get_response(self.controller)
+ result = resp.body.split()
+ self.assertEqual(result, ['2', ])
+ # test marker > end_marker
+ req = Request.blank('/sda1/p/a/c?marker=2&end_marker=1',
+ environ={'REQUEST_METHOD': 'GET'})
+ resp = req.get_response(self.controller)
+ result = resp.body.split()
+ self.assertEqual(result, [])
def test_weird_content_types(self):
snowman = u'\u2603'
diff --git a/test/unit/container/test_sharder.py b/test/unit/container/test_sharder.py
new file mode 100644
index 0000000000..353d980bbf
--- /dev/null
+++ b/test/unit/container/test_sharder.py
@@ -0,0 +1,4580 @@
+# Copyright (c) 2010-2017 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import hashlib
+import json
+import random
+
+import eventlet
+import os
+import shutil
+from contextlib import contextmanager
+from tempfile import mkdtemp
+
+import mock
+import unittest
+
+from collections import defaultdict
+
+import time
+
+from copy import deepcopy
+
+from swift.common import internal_client
+from swift.container import replicator
+from swift.container.backend import ContainerBroker, UNSHARDED, SHARDING, \
+ SHARDED, DATADIR
+from swift.container.sharder import ContainerSharder, sharding_enabled, \
+ CleavingContext, DEFAULT_SHARD_SHRINK_POINT, \
+ DEFAULT_SHARD_CONTAINER_THRESHOLD
+from swift.common.utils import ShardRange, Timestamp, hash_path, \
+ encode_timestamps, parse_db_filename, quorum_size, Everything
+from test import annotate_failure
+
+from test.unit import FakeLogger, debug_logger, FakeRing, \
+ make_timestamp_iter, unlink_files, mocked_http_conn, mock_timestamp_now, \
+ attach_fake_replication_rpc
+
+
+class BaseTestSharder(unittest.TestCase):
+ def setUp(self):
+ self.tempdir = mkdtemp()
+ self.ts_iter = make_timestamp_iter()
+
+ def tearDown(self):
+ shutil.rmtree(self.tempdir, ignore_errors=True)
+
+ def _assert_shard_ranges_equal(self, expected, actual):
+ self.assertEqual([dict(sr) for sr in expected],
+ [dict(sr) for sr in actual])
+
+ def _make_broker(self, account='a', container='c', epoch=None,
+ device='sda', part=0, hash_=None):
+ hash_ = hash_ or hashlib.md5(container).hexdigest()
+ datadir = os.path.join(
+ self.tempdir, device, 'containers', str(part), hash_[-3:], hash_)
+ if epoch:
+ filename = '%s_%s.db' % (hash, epoch)
+ else:
+ filename = hash_ + '.db'
+ db_file = os.path.join(datadir, filename)
+ broker = ContainerBroker(
+ db_file, account=account, container=container,
+ logger=debug_logger())
+ broker.initialize()
+ return broker
+
+ def _make_sharding_broker(self, account='a', container='c',
+ shard_bounds=(('', 'middle'), ('middle', ''))):
+ broker = self._make_broker(account=account, container=container)
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ old_db_id = broker.get_info()['id']
+ broker.enable_sharding(next(self.ts_iter))
+ shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.CLEAVED)
+ broker.merge_shard_ranges(shard_ranges)
+ self.assertTrue(broker.set_sharding_state())
+ broker = ContainerBroker(broker.db_file, account='a', container='c')
+ self.assertNotEqual(old_db_id, broker.get_info()['id']) # sanity check
+ return broker
+
+ def _make_shard_ranges(self, bounds, state=None, object_count=0):
+ return [ShardRange('.shards_a/c_%s' % upper, Timestamp.now(),
+ lower, upper, state=state,
+ object_count=object_count)
+ for lower, upper in bounds]
+
+ def ts_encoded(self):
+ # make a unique timestamp string with multiple timestamps encoded;
+ # use different deltas between component timestamps
+ timestamps = [next(self.ts_iter) for i in range(4)]
+ return encode_timestamps(
+ timestamps[0], timestamps[1], timestamps[3])
+
+
+class TestSharder(BaseTestSharder):
+ def test_init(self):
+ def do_test(conf, expected):
+ with mock.patch(
+ 'swift.container.sharder.internal_client.InternalClient') \
+ as mock_ic:
+ with mock.patch('swift.common.db_replicator.ring.Ring') \
+ as mock_ring:
+ mock_ring.return_value = mock.MagicMock()
+ mock_ring.return_value.replica_count = 3
+ sharder = ContainerSharder(conf)
+ mock_ring.assert_called_once_with(
+ '/etc/swift', ring_name='container')
+ self.assertEqual(
+ 'container-sharder', sharder.logger.logger.name)
+ for k, v in expected.items():
+ self.assertTrue(hasattr(sharder, k), 'Missing attr %s' % k)
+ self.assertEqual(v, getattr(sharder, k),
+ 'Incorrect value: expected %s=%s but got %s' %
+ (k, v, getattr(sharder, k)))
+ return mock_ic
+
+ expected = {
+ 'mount_check': True, 'bind_ip': '0.0.0.0', 'port': 6201,
+ 'per_diff': 1000, 'max_diffs': 100, 'interval': 30,
+ 'cleave_row_batch_size': 10000,
+ 'node_timeout': 10, 'conn_timeout': 5,
+ 'rsync_compress': False,
+ 'rsync_module': '{replication_ip}::container',
+ 'reclaim_age': 86400 * 7,
+ 'shard_shrink_point': 0.25,
+ 'shrink_merge_point': 0.75,
+ 'shard_container_threshold': 10000000,
+ 'split_size': 5000000,
+ 'cleave_batch_size': 2,
+ 'scanner_batch_size': 10,
+ 'rcache': '/var/cache/swift/container.recon',
+ 'shards_account_prefix': '.shards_',
+ 'auto_shard': False,
+ 'recon_candidates_limit': 5,
+ 'shard_replication_quorum': 2,
+ 'existing_shard_replication_quorum': 2
+ }
+ mock_ic = do_test({}, expected)
+ mock_ic.assert_called_once_with(
+ '/etc/swift/internal-client.conf', 'Swift Container Sharder', 3,
+ allow_modify_pipeline=False)
+
+ conf = {
+ 'mount_check': False, 'bind_ip': '10.11.12.13', 'bind_port': 62010,
+ 'per_diff': 2000, 'max_diffs': 200, 'interval': 60,
+ 'cleave_row_batch_size': 3000,
+ 'node_timeout': 20, 'conn_timeout': 1,
+ 'rsync_compress': True,
+ 'rsync_module': '{replication_ip}::container_sda/',
+ 'reclaim_age': 86400 * 14,
+ 'shard_shrink_point': 35,
+ 'shard_shrink_merge_point': 85,
+ 'shard_container_threshold': 20000000,
+ 'cleave_batch_size': 4,
+ 'shard_scanner_batch_size': 8,
+ 'request_tries': 2,
+ 'internal_client_conf_path': '/etc/swift/my-sharder-ic.conf',
+ 'recon_cache_path': '/var/cache/swift-alt',
+ 'auto_create_account_prefix': '...',
+ 'auto_shard': 'yes',
+ 'recon_candidates_limit': 10,
+ 'shard_replication_quorum': 1,
+ 'existing_shard_replication_quorum': 0
+ }
+ expected = {
+ 'mount_check': False, 'bind_ip': '10.11.12.13', 'port': 62010,
+ 'per_diff': 2000, 'max_diffs': 200, 'interval': 60,
+ 'cleave_row_batch_size': 3000,
+ 'node_timeout': 20, 'conn_timeout': 1,
+ 'rsync_compress': True,
+ 'rsync_module': '{replication_ip}::container_sda',
+ 'reclaim_age': 86400 * 14,
+ 'shard_shrink_point': 0.35,
+ 'shrink_merge_point': 0.85,
+ 'shard_container_threshold': 20000000,
+ 'split_size': 10000000,
+ 'cleave_batch_size': 4,
+ 'scanner_batch_size': 8,
+ 'rcache': '/var/cache/swift-alt/container.recon',
+ 'shards_account_prefix': '...shards_',
+ 'auto_shard': True,
+ 'recon_candidates_limit': 10,
+ 'shard_replication_quorum': 1,
+ 'existing_shard_replication_quorum': 0
+ }
+ mock_ic = do_test(conf, expected)
+ mock_ic.assert_called_once_with(
+ '/etc/swift/my-sharder-ic.conf', 'Swift Container Sharder', 2,
+ allow_modify_pipeline=False)
+
+ expected.update({'shard_replication_quorum': 3,
+ 'existing_shard_replication_quorum': 3})
+ conf.update({'shard_replication_quorum': 4,
+ 'existing_shard_replication_quorum': 4})
+ do_test(conf, expected)
+
+ with self.assertRaises(ValueError) as cm:
+ do_test({'shard_shrink_point': 101}, {})
+ self.assertIn(
+ 'greater than 0, less than 100, not "101"', cm.exception.message)
+ self.assertIn('shard_shrink_point', cm.exception.message)
+
+ with self.assertRaises(ValueError) as cm:
+ do_test({'shard_shrink_merge_point': 101}, {})
+ self.assertIn(
+ 'greater than 0, less than 100, not "101"', cm.exception.message)
+ self.assertIn('shard_shrink_merge_point', cm.exception.message)
+
+ def test_init_internal_client_conf_loading_error(self):
+ with mock.patch('swift.common.db_replicator.ring.Ring') \
+ as mock_ring:
+ mock_ring.return_value = mock.MagicMock()
+ mock_ring.return_value.replica_count = 3
+ with self.assertRaises(SystemExit) as cm:
+ ContainerSharder(
+ {'internal_client_conf_path':
+ os.path.join(self.tempdir, 'nonexistent')})
+ self.assertIn('Unable to load internal client', str(cm.exception))
+
+ with mock.patch('swift.common.db_replicator.ring.Ring') \
+ as mock_ring:
+ mock_ring.return_value = mock.MagicMock()
+ mock_ring.return_value.replica_count = 3
+ with mock.patch(
+ 'swift.container.sharder.internal_client.InternalClient',
+ side_effect=Exception('kaboom')):
+ with self.assertRaises(Exception) as cm:
+ ContainerSharder({})
+ self.assertIn('kaboom', str(cm.exception))
+
+ def _assert_stats(self, expected, sharder, category):
+ # assertEqual doesn't work with a defaultdict
+ stats = sharder.stats['sharding'][category]
+ for k, v in expected.items():
+ actual = stats[k]
+ self.assertEqual(
+ v, actual, 'Expected %s but got %s for %s in %s' %
+ (v, actual, k, stats))
+ return stats
+
+ def _assert_recon_stats(self, expected, sharder, category):
+ with open(sharder.rcache, 'rb') as fd:
+ recon = json.load(fd)
+ stats = recon['sharding_stats']['sharding'].get(category)
+ self.assertEqual(expected, stats)
+
+ def test_increment_stats(self):
+ with self._mock_sharder() as sharder:
+ sharder._increment_stat('visited', 'success')
+ sharder._increment_stat('visited', 'success')
+ sharder._increment_stat('visited', 'failure')
+ sharder._increment_stat('visited', 'completed')
+ sharder._increment_stat('cleaved', 'success')
+ sharder._increment_stat('scanned', 'found', step=4)
+ expected = {'success': 2,
+ 'failure': 1,
+ 'completed': 1}
+ self._assert_stats(expected, sharder, 'visited')
+ self._assert_stats({'success': 1}, sharder, 'cleaved')
+ self._assert_stats({'found': 4}, sharder, 'scanned')
+
+ def test_increment_stats_with_statsd(self):
+ with self._mock_sharder() as sharder:
+ sharder._increment_stat('visited', 'success', statsd=True)
+ sharder._increment_stat('visited', 'success', statsd=True)
+ sharder._increment_stat('visited', 'failure', statsd=True)
+ sharder._increment_stat('visited', 'failure', statsd=False)
+ sharder._increment_stat('visited', 'completed')
+ expected = {'success': 2,
+ 'failure': 2,
+ 'completed': 1}
+ self._assert_stats(expected, sharder, 'visited')
+ counts = sharder.logger.get_increment_counts()
+ self.assertEqual(2, counts.get('visited_success'))
+ self.assertEqual(1, counts.get('visited_failure'))
+ self.assertIsNone(counts.get('visited_completed'))
+
+ def test_run_forever(self):
+ conf = {'recon_cache_path': self.tempdir,
+ 'devices': self.tempdir}
+ with self._mock_sharder(conf) as sharder:
+ sharder._check_node = lambda *args: True
+ sharder.logger.clear()
+ brokers = []
+ for container in ('c1', 'c2'):
+ broker = self._make_broker(
+ container=container, hash_=container + 'hash',
+ device=sharder.ring.devs[0]['device'], part=0)
+ broker.update_metadata({'X-Container-Sysmeta-Sharding':
+ ('true', next(self.ts_iter).internal)})
+ brokers.append(broker)
+
+ fake_stats = {
+ 'scanned': {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 2, 'min_time': 99, 'max_time': 123},
+ 'created': {'attempted': 1, 'success': 1, 'failure': 1},
+ 'cleaved': {'attempted': 1, 'success': 1, 'failure': 0,
+ 'min_time': 0.01, 'max_time': 1.3},
+ 'misplaced': {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 1, 'unplaced': 0},
+ 'audit_root': {'attempted': 5, 'success': 4, 'failure': 1},
+ 'audit_shard': {'attempted': 2, 'success': 2, 'failure': 0},
+ }
+ # NB these are time increments not absolute times...
+ fake_periods = [1, 2, 3, 3600, 4, 15, 15, 0]
+ fake_periods_iter = iter(fake_periods)
+ recon_data = []
+ fake_process_broker_calls = []
+
+ def mock_dump_recon_cache(data, *args):
+ recon_data.append(deepcopy(data))
+
+ with mock.patch('swift.container.sharder.time.time') as fake_time:
+ def fake_process_broker(broker, *args, **kwargs):
+ # increment time and inject some fake stats
+ fake_process_broker_calls.append((broker, args, kwargs))
+ try:
+ fake_time.return_value += next(fake_periods_iter)
+ except StopIteration:
+ # bail out
+ fake_time.side_effect = Exception('Test over')
+ sharder.stats['sharding'].update(fake_stats)
+
+ with mock.patch(
+ 'swift.container.sharder.time.sleep') as mock_sleep:
+ with mock.patch(
+ 'swift.container.sharder.is_sharding_candidate',
+ return_value=True):
+ with mock.patch(
+ 'swift.container.sharder.dump_recon_cache',
+ mock_dump_recon_cache):
+ fake_time.return_value = next(fake_periods_iter)
+ sharder._is_sharding_candidate = lambda x: True
+ sharder._process_broker = fake_process_broker
+ with self.assertRaises(Exception) as cm:
+ sharder.run_forever()
+
+ self.assertEqual('Test over', cm.exception.message)
+ # four cycles are started, two brokers visited per cycle, but
+ # fourth never completes
+ self.assertEqual(8, len(fake_process_broker_calls))
+ # expect initial random sleep then one sleep between first and
+ # second pass
+ self.assertEqual(2, mock_sleep.call_count)
+ self.assertLessEqual(mock_sleep.call_args_list[0][0][0], 30)
+ self.assertLessEqual(mock_sleep.call_args_list[1][0][0],
+ 30 - fake_periods[0])
+
+ lines = sharder.logger.get_lines_for_level('info')
+ categories = ('visited', 'scanned', 'created', 'cleaved',
+ 'misplaced', 'audit_root', 'audit_shard')
+
+ def check_categories(start_time):
+ for category in categories:
+ line = lines.pop(0)
+ self.assertIn('Since %s' % time.ctime(start_time), line)
+ self.assertIn(category, line)
+ for k, v in fake_stats.get(category, {}).items():
+ self.assertIn('%s:%s' % (k, v), line)
+
+ def check_logs(cycle_time, start_time,
+ expect_periodic_stats=False):
+ self.assertIn('Container sharder cycle starting', lines.pop(0))
+ check_categories(start_time)
+ if expect_periodic_stats:
+ check_categories(start_time)
+ self.assertIn('Container sharder cycle completed: %.02fs' %
+ cycle_time, lines.pop(0))
+
+ check_logs(sum(fake_periods[1:3]), fake_periods[0])
+ check_logs(sum(fake_periods[3:5]), sum(fake_periods[:3]),
+ expect_periodic_stats=True)
+ check_logs(sum(fake_periods[5:7]), sum(fake_periods[:5]))
+ # final cycle start but then exception pops to terminate test
+ self.assertIn('Container sharder cycle starting', lines.pop(0))
+ self.assertFalse(lines)
+ lines = sharder.logger.get_lines_for_level('error')
+ self.assertIn(
+ 'Unhandled exception while dumping progress', lines[0])
+ self.assertIn('Test over', lines[0])
+
+ def check_recon(data, time, last, expected_stats):
+ self.assertEqual(time, data['sharding_time'])
+ self.assertEqual(last, data['sharding_last'])
+ self.assertEqual(
+ expected_stats, dict(data['sharding_stats']['sharding']))
+
+ def stats_for_candidate(broker):
+ return {'object_count': 0,
+ 'account': broker.account,
+ 'meta_timestamp': mock.ANY,
+ 'container': broker.container,
+ 'file_size': os.stat(broker.db_file).st_size,
+ 'path': broker.db_file,
+ 'root': broker.path,
+ 'node_index': 0}
+
+ self.assertEqual(4, len(recon_data))
+ # stats report at end of first cycle
+ fake_stats.update({'visited': {'attempted': 2, 'skipped': 0,
+ 'success': 2, 'failure': 0,
+ 'completed': 0}})
+ fake_stats.update({
+ 'sharding_candidates': {
+ 'found': 2,
+ 'top': [stats_for_candidate(call[0])
+ for call in fake_process_broker_calls[:2]]
+ }
+ })
+ check_recon(recon_data[0], sum(fake_periods[1:3]),
+ sum(fake_periods[:3]), fake_stats)
+ # periodic stats report after first broker has been visited during
+ # second cycle - one candidate identified so far this cycle
+ fake_stats.update({'visited': {'attempted': 1, 'skipped': 0,
+ 'success': 1, 'failure': 0,
+ 'completed': 0}})
+ fake_stats.update({
+ 'sharding_candidates': {
+ 'found': 1,
+ 'top': [stats_for_candidate(call[0])
+ for call in fake_process_broker_calls[2:3]]
+ }
+ })
+ check_recon(recon_data[1], fake_periods[3],
+ sum(fake_periods[:4]), fake_stats)
+ # stats report at end of second cycle - both candidates reported
+ fake_stats.update({'visited': {'attempted': 2, 'skipped': 0,
+ 'success': 2, 'failure': 0,
+ 'completed': 0}})
+ fake_stats.update({
+ 'sharding_candidates': {
+ 'found': 2,
+ 'top': [stats_for_candidate(call[0])
+ for call in fake_process_broker_calls[2:4]]
+ }
+ })
+ check_recon(recon_data[2], sum(fake_periods[3:5]),
+ sum(fake_periods[:5]), fake_stats)
+ # stats report at end of third cycle
+ fake_stats.update({'visited': {'attempted': 2, 'skipped': 0,
+ 'success': 2, 'failure': 0,
+ 'completed': 0}})
+ fake_stats.update({
+ 'sharding_candidates': {
+ 'found': 2,
+ 'top': [stats_for_candidate(call[0])
+ for call in fake_process_broker_calls[4:6]]
+ }
+ })
+ check_recon(recon_data[3], sum(fake_periods[5:7]),
+ sum(fake_periods[:7]), fake_stats)
+
+ def test_one_shard_cycle(self):
+ conf = {'recon_cache_path': self.tempdir,
+ 'devices': self.tempdir,
+ 'shard_container_threshold': 9}
+ with self._mock_sharder(conf) as sharder:
+ sharder._check_node = lambda *args: True
+ sharder.reported = time.time()
+ sharder.logger = debug_logger()
+ brokers = []
+ device_ids = set(range(3))
+ for device_id in device_ids:
+ brokers.append(self._make_broker(
+ container='c%s' % device_id, hash_='c%shash' % device_id,
+ device=sharder.ring.devs[device_id]['device'], part=0))
+ # enable a/c2 and a/c3 for sharding
+ for broker in brokers[1:]:
+ broker.update_metadata({'X-Container-Sysmeta-Sharding':
+ ('true', next(self.ts_iter).internal)})
+ # make a/c2 a candidate for sharding
+ for i in range(10):
+ brokers[1].put_object('o%s' % i, next(self.ts_iter).internal,
+ 0, 'text/plain', 'etag', 0)
+
+ # check only sharding enabled containers are processed
+ with mock.patch.object(
+ sharder, '_process_broker'
+ ) as mock_process_broker:
+ sharder._local_device_ids = {'stale_node_id'}
+ sharder._one_shard_cycle(Everything(), Everything())
+
+ self.assertEqual(device_ids, sharder._local_device_ids)
+ self.assertEqual(2, mock_process_broker.call_count)
+ processed_paths = [call[0][0].path
+ for call in mock_process_broker.call_args_list]
+ self.assertEqual({'a/c1', 'a/c2'}, set(processed_paths))
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ expected_stats = {'attempted': 2, 'success': 2, 'failure': 0,
+ 'skipped': 1, 'completed': 0}
+ self._assert_recon_stats(expected_stats, sharder, 'visited')
+ expected_candidate_stats = {
+ 'found': 1,
+ 'top': [{'object_count': 10, 'account': 'a', 'container': 'c1',
+ 'meta_timestamp': mock.ANY,
+ 'file_size': os.stat(brokers[1].db_file).st_size,
+ 'path': brokers[1].db_file, 'root': 'a/c1',
+ 'node_index': 1}]}
+ self._assert_recon_stats(
+ expected_candidate_stats, sharder, 'sharding_candidates')
+ self._assert_recon_stats(None, sharder, 'sharding_progress')
+
+ # enable and progress container a/c1 by giving it shard ranges
+ now = next(self.ts_iter)
+ brokers[0].merge_shard_ranges(
+ [ShardRange('a/c0', now, '', '', state=ShardRange.SHARDING),
+ ShardRange('.s_a/1', now, '', 'b', state=ShardRange.ACTIVE),
+ ShardRange('.s_a/2', now, 'b', 'c', state=ShardRange.CLEAVED),
+ ShardRange('.s_a/3', now, 'c', 'd', state=ShardRange.CREATED),
+ ShardRange('.s_a/4', now, 'd', 'e', state=ShardRange.CREATED),
+ ShardRange('.s_a/5', now, 'e', '', state=ShardRange.FOUND)])
+ brokers[1].merge_shard_ranges(
+ [ShardRange('a/c1', now, '', '', state=ShardRange.SHARDING),
+ ShardRange('.s_a/6', now, '', 'b', state=ShardRange.ACTIVE),
+ ShardRange('.s_a/7', now, 'b', 'c', state=ShardRange.ACTIVE),
+ ShardRange('.s_a/8', now, 'c', 'd', state=ShardRange.CLEAVED),
+ ShardRange('.s_a/9', now, 'd', 'e', state=ShardRange.CREATED),
+ ShardRange('.s_a/0', now, 'e', '', state=ShardRange.CREATED)])
+ for i in range(11):
+ brokers[2].put_object('o%s' % i, next(self.ts_iter).internal,
+ 0, 'text/plain', 'etag', 0)
+
+ def mock_processing(broker, node, part):
+ if broker.path == 'a/c1':
+ raise Exception('kapow!')
+ elif broker.path not in ('a/c0', 'a/c2'):
+ raise BaseException("I don't know how to handle a broker "
+ "for %s" % broker.path)
+
+ # check exceptions are handled
+ with mock.patch.object(
+ sharder, '_process_broker', side_effect=mock_processing
+ ) as mock_process_broker:
+ sharder._local_device_ids = {'stale_node_id'}
+ sharder._one_shard_cycle(Everything(), Everything())
+
+ self.assertEqual(device_ids, sharder._local_device_ids)
+ self.assertEqual(3, mock_process_broker.call_count)
+ processed_paths = [call[0][0].path
+ for call in mock_process_broker.call_args_list]
+ self.assertEqual({'a/c0', 'a/c1', 'a/c2'}, set(processed_paths))
+ lines = sharder.logger.get_lines_for_level('error')
+ self.assertIn('Unhandled exception while processing', lines[0])
+ self.assertFalse(lines[1:])
+ sharder.logger.clear()
+ expected_stats = {'attempted': 3, 'success': 2, 'failure': 1,
+ 'skipped': 0, 'completed': 0}
+ self._assert_recon_stats(expected_stats, sharder, 'visited')
+ expected_candidate_stats = {
+ 'found': 1,
+ 'top': [{'object_count': 11, 'account': 'a', 'container': 'c2',
+ 'meta_timestamp': mock.ANY,
+ 'file_size': os.stat(brokers[1].db_file).st_size,
+ 'path': brokers[2].db_file, 'root': 'a/c2',
+ 'node_index': 2}]}
+ self._assert_recon_stats(
+ expected_candidate_stats, sharder, 'sharding_candidates')
+ expected_in_progress_stats = {
+ 'all': [{'object_count': 0, 'account': 'a', 'container': 'c0',
+ 'meta_timestamp': mock.ANY,
+ 'file_size': os.stat(brokers[0].db_file).st_size,
+ 'path': brokers[0].db_file, 'root': 'a/c0',
+ 'node_index': 0,
+ 'found': 1, 'created': 2, 'cleaved': 1, 'active': 1,
+ 'state': 'sharding', 'db_state': 'unsharded',
+ 'error': None},
+ {'object_count': 10, 'account': 'a', 'container': 'c1',
+ 'meta_timestamp': mock.ANY,
+ 'file_size': os.stat(brokers[1].db_file).st_size,
+ 'path': brokers[1].db_file, 'root': 'a/c1',
+ 'node_index': 1,
+ 'found': 0, 'created': 2, 'cleaved': 1, 'active': 2,
+ 'state': 'sharding', 'db_state': 'unsharded',
+ 'error': 'kapow!'}]}
+ self._assert_stats(
+ expected_in_progress_stats, sharder, 'sharding_in_progress')
+
+ # check that candidates and in progress stats don't stick in recon
+ own_shard_range = brokers[0].get_own_shard_range()
+ own_shard_range.state = ShardRange.ACTIVE
+ brokers[0].merge_shard_ranges([own_shard_range])
+ for i in range(10):
+ brokers[1].delete_object(
+ 'o%s' % i, next(self.ts_iter).internal)
+ with mock.patch.object(
+ sharder, '_process_broker'
+ ) as mock_process_broker:
+ sharder._local_device_ids = {999}
+ sharder._one_shard_cycle(Everything(), Everything())
+
+ self.assertEqual(device_ids, sharder._local_device_ids)
+ self.assertEqual(3, mock_process_broker.call_count)
+ processed_paths = [call[0][0].path
+ for call in mock_process_broker.call_args_list]
+ self.assertEqual({'a/c0', 'a/c1', 'a/c2'}, set(processed_paths))
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ expected_stats = {'attempted': 3, 'success': 3, 'failure': 0,
+ 'skipped': 0, 'completed': 0}
+ self._assert_recon_stats(expected_stats, sharder, 'visited')
+ self._assert_recon_stats(
+ expected_candidate_stats, sharder, 'sharding_candidates')
+ self._assert_recon_stats(None, sharder, 'sharding_progress')
+
+ @contextmanager
+ def _mock_sharder(self, conf=None, replicas=3):
+ conf = conf or {}
+ conf['devices'] = self.tempdir
+ with mock.patch(
+ 'swift.container.sharder.internal_client.InternalClient'):
+ with mock.patch(
+ 'swift.common.db_replicator.ring.Ring',
+ lambda *args, **kwargs: FakeRing(replicas=replicas)):
+ sharder = ContainerSharder(conf, logger=FakeLogger())
+ sharder._local_device_ids = {0, 1, 2}
+ sharder._replicate_object = mock.MagicMock(
+ return_value=(True, [True] * sharder.ring.replica_count))
+ yield sharder
+
+ def _get_raw_object_records(self, broker):
+ # use list_objects_iter with no-op transform_func to get back actual
+ # un-transformed rows with encoded timestamps
+ return [list(obj) for obj in broker.list_objects_iter(
+ 10, '', '', '', '', include_deleted=None, all_policies=True,
+ transform_func=lambda record: record)]
+
+ def _check_objects(self, expected_objs, shard_db):
+ shard_broker = ContainerBroker(shard_db)
+ shard_objs = self._get_raw_object_records(shard_broker)
+ expected_objs = [list(obj) for obj in expected_objs]
+ self.assertEqual(expected_objs, shard_objs)
+
+ def _check_shard_range(self, expected, actual):
+ expected_dict = dict(expected)
+ actual_dict = dict(actual)
+ self.assertGreater(actual_dict.pop('meta_timestamp'),
+ expected_dict.pop('meta_timestamp'))
+ self.assertEqual(expected_dict, actual_dict)
+
+ def test_fetch_shard_ranges_unexpected_response(self):
+ broker = self._make_broker()
+ exc = internal_client.UnexpectedResponse(
+ 'Unexpected response: 404', None)
+ with self._mock_sharder() as sharder:
+ sharder.int_client.make_request.side_effect = exc
+ self.assertIsNone(sharder._fetch_shard_ranges(broker))
+ lines = sharder.logger.get_lines_for_level('warning')
+ self.assertIn('Unexpected response: 404', lines[0])
+ self.assertFalse(lines[1:])
+
+ def test_fetch_shard_ranges_bad_record_type(self):
+ def do_test(mock_resp_headers):
+ with self._mock_sharder() as sharder:
+ mock_make_request = mock.MagicMock(
+ return_value=mock.MagicMock(headers=mock_resp_headers))
+ sharder.int_client.make_request = mock_make_request
+ self.assertIsNone(sharder._fetch_shard_ranges(broker))
+ lines = sharder.logger.get_lines_for_level('error')
+ self.assertIn('unexpected record type', lines[0])
+ self.assertFalse(lines[1:])
+
+ broker = self._make_broker()
+ do_test({})
+ do_test({'x-backend-record-type': 'object'})
+ do_test({'x-backend-record-type': 'disco'})
+
+ def test_fetch_shard_ranges_bad_data(self):
+ def do_test(mock_resp_body):
+ mock_resp_headers = {'x-backend-record-type': 'shard'}
+ with self._mock_sharder() as sharder:
+ mock_make_request = mock.MagicMock(
+ return_value=mock.MagicMock(headers=mock_resp_headers,
+ body=mock_resp_body))
+ sharder.int_client.make_request = mock_make_request
+ self.assertIsNone(sharder._fetch_shard_ranges(broker))
+ lines = sharder.logger.get_lines_for_level('error')
+ self.assertIn('invalid data', lines[0])
+ self.assertFalse(lines[1:])
+
+ broker = self._make_broker()
+ do_test({})
+ do_test('')
+ do_test(json.dumps({}))
+ do_test(json.dumps([{'account': 'a', 'container': 'c'}]))
+
+ def test_fetch_shard_ranges_ok(self):
+ def do_test(mock_resp_body, params):
+ mock_resp_headers = {'x-backend-record-type': 'shard'}
+ with self._mock_sharder() as sharder:
+ mock_make_request = mock.MagicMock(
+ return_value=mock.MagicMock(headers=mock_resp_headers,
+ body=mock_resp_body))
+ sharder.int_client.make_request = mock_make_request
+ mock_make_path = mock.MagicMock(return_value='/v1/a/c')
+ sharder.int_client.make_path = mock_make_path
+ actual = sharder._fetch_shard_ranges(broker, params=params)
+ sharder.int_client.make_path.assert_called_once_with('a', 'c')
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ return actual, mock_make_request
+
+ expected_headers = {'X-Backend-Record-Type': 'shard',
+ 'X-Backend-Include-Deleted': 'False',
+ 'X-Backend-Override-Deleted': 'true'}
+ broker = self._make_broker()
+ shard_ranges = self._make_shard_ranges((('', 'm'), ('m', '')))
+
+ params = {'format': 'json'}
+ actual, mock_call = do_test(json.dumps([dict(shard_ranges[0])]),
+ params={})
+ mock_call.assert_called_once_with(
+ 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,),
+ params=params)
+ self._assert_shard_ranges_equal([shard_ranges[0]], actual)
+
+ params = {'format': 'json', 'includes': 'thing'}
+ actual, mock_call = do_test(
+ json.dumps([dict(sr) for sr in shard_ranges]), params=params)
+ self._assert_shard_ranges_equal(shard_ranges, actual)
+ mock_call.assert_called_once_with(
+ 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,),
+ params=params)
+
+ params = {'format': 'json', 'end_marker': 'there', 'marker': 'here'}
+ actual, mock_call = do_test(json.dumps([]), params=params)
+ self._assert_shard_ranges_equal([], actual)
+ mock_call.assert_called_once_with(
+ 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,),
+ params=params)
+
+ def _check_cleave_root(self, conf=None):
+ broker = self._make_broker()
+ objects = [
+ # shard 0
+ ('a', self.ts_encoded(), 10, 'text/plain', 'etag_a', 0, 0),
+ ('here', self.ts_encoded(), 10, 'text/plain', 'etag_here', 0, 0),
+ # shard 1
+ ('m', self.ts_encoded(), 1, 'text/plain', 'etag_m', 0, 0),
+ ('n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0),
+ ('there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0),
+ # shard 2
+ ('where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0,
+ 0),
+ # shard 3
+ ('x', self.ts_encoded(), 0, '', '', 1, 0), # deleted
+ ('y', self.ts_encoded(), 1000, 'text/plain', 'etag_y', 0, 0),
+ # shard 4
+ ('yyyy', self.ts_encoded(), 14, 'text/plain', 'etag_yyyy', 0, 0),
+ ]
+ for obj in objects:
+ broker.put_object(*obj)
+ initial_root_info = broker.get_info()
+ broker.enable_sharding(Timestamp.now())
+
+ shard_bounds = (('', 'here'), ('here', 'there'),
+ ('there', 'where'), ('where', 'yonder'),
+ ('yonder', ''))
+ shard_ranges = self._make_shard_ranges(shard_bounds)
+ expected_shard_dbs = []
+ for shard_range in shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+
+ # used to accumulate stats from sharded dbs
+ total_shard_stats = {'object_count': 0, 'bytes_used': 0}
+ # run cleave - no shard ranges, nothing happens
+ with self._mock_sharder(conf=conf) as sharder:
+ self.assertFalse(sharder._cleave(broker))
+
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual('', context.cursor)
+ self.assertEqual(9, context.cleave_to_row)
+ self.assertEqual(9, context.max_row)
+ self.assertEqual(0, context.ranges_done)
+ self.assertEqual(0, context.ranges_todo)
+
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+ sharder._replicate_object.assert_not_called()
+ for db in expected_shard_dbs:
+ with annotate_failure(db):
+ self.assertFalse(os.path.exists(db))
+
+ # run cleave - all shard ranges in found state, nothing happens
+ broker.merge_shard_ranges(shard_ranges[:4])
+ self.assertTrue(broker.set_sharding_state())
+
+ with self._mock_sharder(conf=conf) as sharder:
+ self.assertFalse(sharder._cleave(broker))
+
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual('', context.cursor)
+ self.assertEqual(9, context.cleave_to_row)
+ self.assertEqual(9, context.max_row)
+ self.assertEqual(0, context.ranges_done)
+ self.assertEqual(4, context.ranges_todo)
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_not_called()
+ for db in expected_shard_dbs:
+ with annotate_failure(db):
+ self.assertFalse(os.path.exists(db))
+ for shard_range in broker.get_shard_ranges():
+ with annotate_failure(shard_range):
+ self.assertEqual(ShardRange.FOUND, shard_range.state)
+
+ # move first shard range to created state, first shard range is cleaved
+ shard_ranges[0].update_state(ShardRange.CREATED)
+ broker.merge_shard_ranges(shard_ranges[:1])
+ with self._mock_sharder(conf=conf) as sharder:
+ self.assertFalse(sharder._cleave(broker))
+
+ expected = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'min_time': mock.ANY, 'max_time': mock.ANY}
+ stats = self._assert_stats(expected, sharder, 'cleaved')
+ self.assertIsInstance(stats['min_time'], float)
+ self.assertIsInstance(stats['max_time'], float)
+ self.assertLessEqual(stats['min_time'], stats['max_time'])
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[0], 0)
+ shard_broker = ContainerBroker(expected_shard_dbs[0])
+ shard_own_sr = shard_broker.get_own_shard_range()
+ self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state)
+ shard_info = shard_broker.get_info()
+ total_shard_stats['object_count'] += shard_info['object_count']
+ total_shard_stats['bytes_used'] += shard_info['bytes_used']
+
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(4, len(updated_shard_ranges))
+ # update expected state and metadata, check cleaved shard range
+ shard_ranges[0].bytes_used = 20
+ shard_ranges[0].object_count = 2
+ shard_ranges[0].state = ShardRange.CLEAVED
+ self._check_shard_range(shard_ranges[0], updated_shard_ranges[0])
+ self._check_objects(objects[:2], expected_shard_dbs[0])
+ # other shard ranges should be unchanged
+ for i in range(1, len(shard_ranges)):
+ with annotate_failure(i):
+ self.assertFalse(os.path.exists(expected_shard_dbs[i]))
+ for i in range(1, len(updated_shard_ranges)):
+ with annotate_failure(i):
+ self.assertEqual(dict(shard_ranges[i]),
+ dict(updated_shard_ranges[i]))
+
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual('here', context.cursor)
+ self.assertEqual(9, context.cleave_to_row)
+ self.assertEqual(9, context.max_row)
+ self.assertEqual(1, context.ranges_done)
+ self.assertEqual(3, context.ranges_todo)
+
+ unlink_files(expected_shard_dbs)
+
+ # move more shard ranges to created state
+ for i in range(1, 4):
+ shard_ranges[i].update_state(ShardRange.CREATED)
+ broker.merge_shard_ranges(shard_ranges[1:4])
+
+ # replication of next shard range is not sufficiently successful
+ with self._mock_sharder(conf=conf) as sharder:
+ quorum = quorum_size(sharder.ring.replica_count)
+ successes = [True] * (quorum - 1)
+ fails = [False] * (sharder.ring.replica_count - len(successes))
+ responses = successes + fails
+ random.shuffle(responses)
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=((False, responses),))
+ self.assertFalse(sharder._cleave(broker))
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[1], 0)
+
+ # cleaving state is unchanged
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(4, len(updated_shard_ranges))
+ for i in range(1, len(updated_shard_ranges)):
+ with annotate_failure(i):
+ self.assertEqual(dict(shard_ranges[i]),
+ dict(updated_shard_ranges[i]))
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual('here', context.cursor)
+ self.assertEqual(9, context.cleave_to_row)
+ self.assertEqual(9, context.max_row)
+ self.assertEqual(1, context.ranges_done)
+ self.assertEqual(3, context.ranges_todo)
+
+ # try again, this time replication is sufficiently successful
+ with self._mock_sharder(conf=conf) as sharder:
+ successes = [True] * quorum
+ fails = [False] * (sharder.ring.replica_count - len(successes))
+ responses1 = successes + fails
+ responses2 = fails + successes
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=((False, responses1), (False, responses2)))
+ self.assertFalse(sharder._cleave(broker))
+
+ expected = {'attempted': 2, 'success': 2, 'failure': 0,
+ 'min_time': mock.ANY, 'max_time': mock.ANY}
+ stats = self._assert_stats(expected, sharder, 'cleaved')
+ self.assertIsInstance(stats['min_time'], float)
+ self.assertIsInstance(stats['max_time'], float)
+ self.assertLessEqual(stats['min_time'], stats['max_time'])
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0) for db in expected_shard_dbs[1:3]]
+ )
+ for db in expected_shard_dbs[1:3]:
+ shard_broker = ContainerBroker(db)
+ shard_own_sr = shard_broker.get_own_shard_range()
+ self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state)
+ shard_info = shard_broker.get_info()
+ total_shard_stats['object_count'] += shard_info['object_count']
+ total_shard_stats['bytes_used'] += shard_info['bytes_used']
+
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(4, len(updated_shard_ranges))
+
+ # only 2 are cleaved per batch
+ # update expected state and metadata, check cleaved shard ranges
+ shard_ranges[1].bytes_used = 6
+ shard_ranges[1].object_count = 3
+ shard_ranges[1].state = ShardRange.CLEAVED
+ shard_ranges[2].bytes_used = 100
+ shard_ranges[2].object_count = 1
+ shard_ranges[2].state = ShardRange.CLEAVED
+ for i in range(0, 3):
+ with annotate_failure(i):
+ self._check_shard_range(
+ shard_ranges[i], updated_shard_ranges[i])
+ self._check_objects(objects[2:5], expected_shard_dbs[1])
+ self._check_objects(objects[5:6], expected_shard_dbs[2])
+ # other shard ranges should be unchanged
+ self.assertFalse(os.path.exists(expected_shard_dbs[0]))
+ for i, db in enumerate(expected_shard_dbs[3:], 3):
+ with annotate_failure(i):
+ self.assertFalse(os.path.exists(db))
+ for i, updated_shard_range in enumerate(updated_shard_ranges[3:], 3):
+ with annotate_failure(i):
+ self.assertEqual(dict(shard_ranges[i]),
+ dict(updated_shard_range))
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual('where', context.cursor)
+ self.assertEqual(9, context.cleave_to_row)
+ self.assertEqual(9, context.max_row)
+ self.assertEqual(3, context.ranges_done)
+ self.assertEqual(1, context.ranges_todo)
+
+ unlink_files(expected_shard_dbs)
+
+ # run cleave again - should process the fourth range
+ with self._mock_sharder(conf=conf) as sharder:
+ sharder.logger = debug_logger()
+ self.assertFalse(sharder._cleave(broker))
+
+ expected = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'min_time': mock.ANY, 'max_time': mock.ANY}
+ stats = self._assert_stats(expected, sharder, 'cleaved')
+ self.assertIsInstance(stats['min_time'], float)
+ self.assertIsInstance(stats['max_time'], float)
+ self.assertLessEqual(stats['min_time'], stats['max_time'])
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[3], 0)
+ shard_broker = ContainerBroker(expected_shard_dbs[3])
+ shard_own_sr = shard_broker.get_own_shard_range()
+ self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state)
+ shard_info = shard_broker.get_info()
+ total_shard_stats['object_count'] += shard_info['object_count']
+ total_shard_stats['bytes_used'] += shard_info['bytes_used']
+
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(4, len(updated_shard_ranges))
+
+ shard_ranges[3].bytes_used = 1000
+ shard_ranges[3].object_count = 1
+ shard_ranges[3].state = ShardRange.CLEAVED
+ for i in range(0, 4):
+ with annotate_failure(i):
+ self._check_shard_range(
+ shard_ranges[i], updated_shard_ranges[i])
+ # NB includes the deleted object
+ self._check_objects(objects[6:8], expected_shard_dbs[3])
+ # other shard ranges should be unchanged
+ for i, db in enumerate(expected_shard_dbs[:3]):
+ with annotate_failure(i):
+ self.assertFalse(os.path.exists(db))
+ self.assertFalse(os.path.exists(expected_shard_dbs[4]))
+ for i, updated_shard_range in enumerate(updated_shard_ranges[4:], 4):
+ with annotate_failure(i):
+ self.assertEqual(dict(shard_ranges[i]),
+ dict(updated_shard_range))
+
+ self.assertFalse(os.path.exists(expected_shard_dbs[4]))
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual('yonder', context.cursor)
+ self.assertEqual(9, context.cleave_to_row)
+ self.assertEqual(9, context.max_row)
+ self.assertEqual(4, context.ranges_done)
+ self.assertEqual(0, context.ranges_todo)
+
+ unlink_files(expected_shard_dbs)
+
+ # run cleave - should be a no-op, all existing ranges have been cleaved
+ with self._mock_sharder(conf=conf) as sharder:
+ self.assertFalse(sharder._cleave(broker))
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_not_called()
+
+ # add final shard range - move this to ACTIVE state and update stats to
+ # simulate another replica having cleaved it and replicated its state
+ shard_ranges[4].update_state(ShardRange.ACTIVE)
+ shard_ranges[4].update_meta(2, 15)
+ broker.merge_shard_ranges(shard_ranges[4:])
+
+ with self._mock_sharder(conf=conf) as sharder:
+ self.assertTrue(sharder._cleave(broker))
+
+ expected = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'min_time': mock.ANY, 'max_time': mock.ANY}
+ stats = self._assert_stats(expected, sharder, 'cleaved')
+ self.assertIsInstance(stats['min_time'], float)
+ self.assertIsInstance(stats['max_time'], float)
+ self.assertLessEqual(stats['min_time'], stats['max_time'])
+
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[4], 0)
+ shard_broker = ContainerBroker(expected_shard_dbs[4])
+ shard_own_sr = shard_broker.get_own_shard_range()
+ self.assertEqual(ShardRange.ACTIVE, shard_own_sr.state)
+ shard_info = shard_broker.get_info()
+ total_shard_stats['object_count'] += shard_info['object_count']
+ total_shard_stats['bytes_used'] += shard_info['bytes_used']
+
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(5, len(updated_shard_ranges))
+ # NB stats of the ACTIVE shard range should not be reset by cleaving
+ for i in range(0, 4):
+ with annotate_failure(i):
+ self._check_shard_range(
+ shard_ranges[i], updated_shard_ranges[i])
+ self.assertEqual(dict(shard_ranges[4]), dict(updated_shard_ranges[4]))
+
+ # object copied to shard
+ self._check_objects(objects[8:], expected_shard_dbs[4])
+ # other shard ranges should be unchanged
+ for i, db in enumerate(expected_shard_dbs[:4]):
+ with annotate_failure(i):
+ self.assertFalse(os.path.exists(db))
+
+ self.assertEqual(initial_root_info['object_count'],
+ total_shard_stats['object_count'])
+ self.assertEqual(initial_root_info['bytes_used'],
+ total_shard_stats['bytes_used'])
+
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertTrue(context.cleaving_done)
+ self.assertEqual('', context.cursor)
+ self.assertEqual(9, context.cleave_to_row)
+ self.assertEqual(9, context.max_row)
+ self.assertEqual(5, context.ranges_done)
+ self.assertEqual(0, context.ranges_todo)
+
+ with self._mock_sharder(conf=conf) as sharder:
+ self.assertTrue(sharder._cleave(broker))
+ sharder._replicate_object.assert_not_called()
+
+ self.assertTrue(broker.set_sharded_state())
+ # run cleave - should be a no-op
+ with self._mock_sharder(conf=conf) as sharder:
+ self.assertTrue(sharder._cleave(broker))
+
+ sharder._replicate_object.assert_not_called()
+
+ def test_cleave_root(self):
+ self._check_cleave_root()
+
+ def test_cleave_root_listing_limit_one(self):
+ # force yield_objects to update its marker and call to the broker's
+ # get_objects() for each shard range, to check the marker moves on
+ self._check_cleave_root(conf={'cleave_row_batch_size': 1})
+
+ def test_cleave_root_ranges_change(self):
+ # verify that objects are not missed if shard ranges change between
+ # cleaving batches
+ broker = self._make_broker()
+ objects = [
+ ('a', self.ts_encoded(), 10, 'text/plain', 'etag_a', 0, 0),
+ ('b', self.ts_encoded(), 10, 'text/plain', 'etag_b', 0, 0),
+ ('c', self.ts_encoded(), 1, 'text/plain', 'etag_c', 0, 0),
+ ('d', self.ts_encoded(), 2, 'text/plain', 'etag_d', 0, 0),
+ ('e', self.ts_encoded(), 3, 'text/plain', 'etag_e', 0, 0),
+ ('f', self.ts_encoded(), 100, 'text/plain', 'etag_f', 0, 0),
+ ('x', self.ts_encoded(), 0, '', '', 1, 0), # deleted
+ ('z', self.ts_encoded(), 1000, 'text/plain', 'etag_z', 0, 0)
+ ]
+ for obj in objects:
+ broker.put_object(*obj)
+ broker.enable_sharding(Timestamp.now())
+
+ shard_bounds = (('', 'd'), ('d', 'x'), ('x', ''))
+ shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.CREATED)
+ expected_shard_dbs = []
+ for shard_range in shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+
+ broker.merge_shard_ranges(shard_ranges[:3])
+ self.assertTrue(broker.set_sharding_state())
+
+ # run cleave - first batch is cleaved
+ with self._mock_sharder() as sharder:
+ self.assertFalse(sharder._cleave(broker))
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual(str(shard_ranges[1].upper), context.cursor)
+ self.assertEqual(8, context.cleave_to_row)
+ self.assertEqual(8, context.max_row)
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0) for db in expected_shard_dbs[:2]]
+ )
+
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(3, len(updated_shard_ranges))
+
+ # first 2 shard ranges should have updated object count, bytes used and
+ # meta_timestamp
+ shard_ranges[0].bytes_used = 23
+ shard_ranges[0].object_count = 4
+ shard_ranges[0].state = ShardRange.CLEAVED
+ self._check_shard_range(shard_ranges[0], updated_shard_ranges[0])
+ shard_ranges[1].bytes_used = 103
+ shard_ranges[1].object_count = 2
+ shard_ranges[1].state = ShardRange.CLEAVED
+ self._check_shard_range(shard_ranges[1], updated_shard_ranges[1])
+ self._check_objects(objects[:4], expected_shard_dbs[0])
+ self._check_objects(objects[4:7], expected_shard_dbs[1])
+ self.assertFalse(os.path.exists(expected_shard_dbs[2]))
+
+ # third shard range should be unchanged - not yet cleaved
+ self.assertEqual(dict(shard_ranges[2]),
+ dict(updated_shard_ranges[2]))
+
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual(str(shard_ranges[1].upper), context.cursor)
+ self.assertEqual(8, context.cleave_to_row)
+ self.assertEqual(8, context.max_row)
+
+ # now change the shard ranges so that third consumes second
+ shard_ranges[1].set_deleted()
+ shard_ranges[2].lower = 'd'
+ shard_ranges[2].timestamp = Timestamp.now()
+
+ broker.merge_shard_ranges(shard_ranges[1:3])
+
+ # run cleave - should process the extended third (final) range
+ with self._mock_sharder() as sharder:
+ self.assertTrue(sharder._cleave(broker))
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[2], 0)
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(2, len(updated_shard_ranges))
+ self._check_shard_range(shard_ranges[0], updated_shard_ranges[0])
+ # third shard range should now have updated object count, bytes used,
+ # including objects previously in the second shard range
+ shard_ranges[2].bytes_used = 1103
+ shard_ranges[2].object_count = 3
+ shard_ranges[2].state = ShardRange.CLEAVED
+ self._check_shard_range(shard_ranges[2], updated_shard_ranges[1])
+ self._check_objects(objects[4:8], expected_shard_dbs[2])
+
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertTrue(context.cleaving_done)
+ self.assertEqual(str(shard_ranges[2].upper), context.cursor)
+ self.assertEqual(8, context.cleave_to_row)
+ self.assertEqual(8, context.max_row)
+
+ def test_cleave_shard(self):
+ broker = self._make_broker(account='.shards_a', container='shard_c')
+ own_shard_range = ShardRange(
+ broker.path, Timestamp.now(), 'here', 'where',
+ state=ShardRange.SHARDING, epoch=Timestamp.now())
+ broker.merge_shard_ranges([own_shard_range])
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ self.assertFalse(broker.is_root_container()) # sanity check
+
+ objects = [
+ ('m', self.ts_encoded(), 1, 'text/plain', 'etag_m', 0, 0),
+ ('n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0),
+ ('there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0),
+ ('where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0,
+ 0),
+ ]
+ misplaced_objects = [
+ ('a', self.ts_encoded(), 1, 'text/plain', 'etag_a', 0, 0),
+ ('z', self.ts_encoded(), 100, 'text/plain', 'etag_z', 1, 0),
+ ]
+ for obj in objects + misplaced_objects:
+ broker.put_object(*obj)
+
+ shard_bounds = (('here', 'there'),
+ ('there', 'where'))
+ shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.CREATED)
+ expected_shard_dbs = []
+ for shard_range in shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+
+ misplaced_bounds = (('', 'here'),
+ ('where', ''))
+ misplaced_ranges = self._make_shard_ranges(
+ misplaced_bounds, state=ShardRange.ACTIVE)
+ misplaced_dbs = []
+ for shard_range in misplaced_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ misplaced_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+
+ broker.merge_shard_ranges(shard_ranges)
+ self.assertTrue(broker.set_sharding_state())
+
+ # run cleave - first range is cleaved but move of misplaced objects is
+ # not successful
+ sharder_conf = {'cleave_batch_size': 1}
+ with self._mock_sharder(sharder_conf) as sharder:
+ with mock.patch.object(
+ sharder, '_make_shard_range_fetcher',
+ return_value=lambda: iter(misplaced_ranges)):
+ # cause misplaced objects replication to not succeed
+ quorum = quorum_size(sharder.ring.replica_count)
+ successes = [True] * (quorum - 1)
+ fails = [False] * (sharder.ring.replica_count - len(successes))
+ responses = successes + fails
+ random.shuffle(responses)
+ bad_result = (False, responses)
+ ok_result = (True, [True] * sharder.ring.replica_count)
+ sharder._replicate_object = mock.MagicMock(
+ # result for misplaced, misplaced, cleave
+ side_effect=(bad_result, ok_result, ok_result))
+ self.assertFalse(sharder._cleave(broker))
+
+ context = CleavingContext.load(broker)
+ self.assertFalse(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual(str(shard_ranges[0].upper), context.cursor)
+ self.assertEqual(6, context.cleave_to_row)
+ self.assertEqual(6, context.max_row)
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, misplaced_dbs[0], 0),
+ mock.call(0, misplaced_dbs[1], 0),
+ mock.call(0, expected_shard_dbs[0], 0)])
+ shard_broker = ContainerBroker(expected_shard_dbs[0])
+ # NB cleaving a shard, state goes to CLEAVED not ACTIVE
+ shard_own_sr = shard_broker.get_own_shard_range()
+ self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state)
+
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(2, len(updated_shard_ranges))
+
+ # first shard range should have updated object count, bytes used and
+ # meta_timestamp
+ shard_ranges[0].bytes_used = 6
+ shard_ranges[0].object_count = 3
+ shard_ranges[0].state = ShardRange.CLEAVED
+ self._check_shard_range(shard_ranges[0], updated_shard_ranges[0])
+ self._check_objects(objects[:3], expected_shard_dbs[0])
+ self.assertFalse(os.path.exists(expected_shard_dbs[1]))
+ self._check_objects(misplaced_objects[:1], misplaced_dbs[0])
+ self._check_objects(misplaced_objects[1:], misplaced_dbs[1])
+ unlink_files(expected_shard_dbs)
+ unlink_files(misplaced_dbs)
+
+ # run cleave - second (final) range is cleaved; move this range to
+ # CLEAVED state and update stats to simulate another replica having
+ # cleaved it and replicated its state
+ shard_ranges[1].update_state(ShardRange.CLEAVED)
+ shard_ranges[1].update_meta(2, 15)
+ broker.merge_shard_ranges(shard_ranges[1:2])
+ with self._mock_sharder(sharder_conf) as sharder:
+ with mock.patch.object(
+ sharder, '_make_shard_range_fetcher',
+ return_value=lambda: iter(misplaced_ranges)):
+ self.assertTrue(sharder._cleave(broker))
+
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertTrue(context.cleaving_done)
+ self.assertEqual(str(shard_ranges[1].upper), context.cursor)
+ self.assertEqual(6, context.cleave_to_row)
+ self.assertEqual(6, context.max_row)
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, misplaced_dbs[0], 0),
+ mock.call(0, expected_shard_dbs[1], 0)])
+ shard_broker = ContainerBroker(expected_shard_dbs[1])
+ shard_own_sr = shard_broker.get_own_shard_range()
+ self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state)
+
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(2, len(updated_shard_ranges))
+
+ # second shard range should have updated object count, bytes used and
+ # meta_timestamp
+ self.assertEqual(dict(shard_ranges[1]), dict(updated_shard_ranges[1]))
+ self._check_objects(objects[3:], expected_shard_dbs[1])
+ self.assertFalse(os.path.exists(expected_shard_dbs[0]))
+ self._check_objects(misplaced_objects[:1], misplaced_dbs[0])
+ self.assertFalse(os.path.exists(misplaced_dbs[1]))
+
+ def test_cleave_shard_shrinking(self):
+ broker = self._make_broker(account='.shards_a', container='shard_c')
+ own_shard_range = ShardRange(
+ broker.path, next(self.ts_iter), 'here', 'where',
+ state=ShardRange.SHRINKING, epoch=next(self.ts_iter))
+ broker.merge_shard_ranges([own_shard_range])
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ self.assertFalse(broker.is_root_container()) # sanity check
+
+ objects = [
+ ('there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0),
+ ('where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0,
+ 0),
+ ]
+ for obj in objects:
+ broker.put_object(*obj)
+ acceptor_epoch = next(self.ts_iter)
+ acceptor = ShardRange('.shards_a/acceptor', Timestamp.now(),
+ 'here', 'yonder', '1000', '11111',
+ state=ShardRange.ACTIVE, epoch=acceptor_epoch)
+ db_hash = hash_path(acceptor.account, acceptor.container)
+ # NB expected cleave db includes acceptor epoch
+ expected_shard_db = os.path.join(
+ self.tempdir, 'sda', 'containers', '0', db_hash[-3:], db_hash,
+ '%s_%s.db' % (db_hash, acceptor_epoch.internal))
+
+ broker.merge_shard_ranges([acceptor])
+ broker.set_sharding_state()
+
+ # run cleave
+ with self._mock_sharder() as sharder:
+ self.assertTrue(sharder._cleave(broker))
+
+ context = CleavingContext.load(broker)
+ self.assertTrue(context.misplaced_done)
+ self.assertTrue(context.cleaving_done)
+ self.assertEqual(str(acceptor.upper), context.cursor)
+ self.assertEqual(2, context.cleave_to_row)
+ self.assertEqual(2, context.max_row)
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, expected_shard_db, 0)])
+ shard_broker = ContainerBroker(expected_shard_db)
+ # NB when cleaving a shard container to a larger acceptor namespace
+ # then expect the shard broker's own shard range to reflect that of the
+ # acceptor shard range rather than being set to CLEAVED.
+ self.assertEqual(
+ ShardRange.ACTIVE, shard_broker.get_own_shard_range().state)
+
+ updated_shard_ranges = broker.get_shard_ranges()
+ self.assertEqual(1, len(updated_shard_ranges))
+ self.assertEqual(dict(acceptor), dict(updated_shard_ranges[0]))
+
+ # shard range should have unmodified acceptor, bytes used and
+ # meta_timestamp
+ self._check_objects(objects, expected_shard_db)
+
+ def test_cleave_repeated(self):
+ # verify that if new objects are merged into retiring db after cleaving
+ # started then cleaving will repeat but only new objects are cleaved
+ # in the repeated cleaving pass
+ broker = self._make_broker()
+ objects = [
+ ('obj%03d' % i, next(self.ts_iter), 1, 'text/plain', 'etag', 0, 0)
+ for i in range(10)
+ ]
+ new_objects = [
+ (name, next(self.ts_iter), 1, 'text/plain', 'etag', 0, 0)
+ for name in ('alpha', 'zeta')
+ ]
+ for obj in objects:
+ broker.put_object(*obj)
+ broker._commit_puts()
+ broker.enable_sharding(Timestamp.now())
+ shard_bounds = (('', 'obj004'), ('obj004', ''))
+ shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.CREATED)
+ expected_shard_dbs = []
+ for shard_range in shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+ broker.merge_shard_ranges(shard_ranges)
+ self.assertTrue(broker.set_sharding_state())
+ old_broker = broker.get_brokers()[0]
+ node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
+ 'index': 0}
+
+ calls = []
+ key = ('name', 'created_at', 'size', 'content_type', 'etag', 'deleted')
+
+ def mock_replicate_object(part, db, node_id):
+ # merge new objects between cleave of first and second shard ranges
+ if not calls:
+ old_broker.merge_items(
+ [dict(zip(key, obj)) for obj in new_objects])
+ calls.append((part, db, node_id))
+ return True, [True, True, True]
+
+ with self._mock_sharder() as sharder:
+ sharder._audit_container = mock.MagicMock()
+ sharder._replicate_object = mock_replicate_object
+ sharder._process_broker(broker, node, 99)
+
+ # sanity check - the new objects merged into the old db
+ self.assertFalse(broker.get_objects())
+ self.assertEqual(12, len(old_broker.get_objects()))
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+ self.assertEqual([(0, expected_shard_dbs[0], 0),
+ (0, expected_shard_dbs[1], 0)], calls)
+
+ # check shard ranges were updated to CLEAVED
+ updated_shard_ranges = broker.get_shard_ranges()
+ # 'alpha' was not in table when first shard was cleaved
+ shard_ranges[0].bytes_used = 5
+ shard_ranges[0].object_count = 5
+ shard_ranges[0].state = ShardRange.CLEAVED
+ self._check_shard_range(shard_ranges[0], updated_shard_ranges[0])
+ self._check_objects(objects[:5], expected_shard_dbs[0])
+ # 'zeta' was in table when second shard was cleaved
+ shard_ranges[1].bytes_used = 6
+ shard_ranges[1].object_count = 6
+ shard_ranges[1].state = ShardRange.CLEAVED
+ self._check_shard_range(shard_ranges[1], updated_shard_ranges[1])
+ self._check_objects(objects[5:] + new_objects[1:],
+ expected_shard_dbs[1])
+
+ context = CleavingContext.load(broker)
+ self.assertFalse(context.misplaced_done)
+ self.assertFalse(context.cleaving_done)
+ self.assertEqual('', context.cursor)
+ self.assertEqual(10, context.cleave_to_row)
+ self.assertEqual(12, context.max_row) # note that max row increased
+ lines = sharder.logger.get_lines_for_level('warning')
+ self.assertIn('Repeat cleaving required', lines[0])
+ self.assertFalse(lines[1:])
+ unlink_files(expected_shard_dbs)
+
+ # repeat the cleaving - the newer objects get cleaved
+ with self._mock_sharder() as sharder:
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+
+ # this time the sharding completed
+ self.assertEqual(SHARDED, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDED,
+ broker.get_own_shard_range().state)
+
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, expected_shard_dbs[0], 0),
+ mock.call(0, expected_shard_dbs[1], 0)])
+
+ # shard ranges are now ACTIVE - stats not updated by cleaving
+ updated_shard_ranges = broker.get_shard_ranges()
+ shard_ranges[0].state = ShardRange.ACTIVE
+ self._check_shard_range(shard_ranges[0], updated_shard_ranges[0])
+ self._check_objects(new_objects[:1], expected_shard_dbs[0])
+ # both new objects are included in repeat cleaving but no older objects
+ shard_ranges[1].state = ShardRange.ACTIVE
+ self._check_shard_range(shard_ranges[1], updated_shard_ranges[1])
+ self._check_objects(new_objects[1:], expected_shard_dbs[1])
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+
+ def test_cleave_multiple_storage_policies(self):
+ # verify that objects in all storage policies are cleaved
+ broker = self._make_broker()
+ # add objects in multiple policies
+ objects = [{'name': 'obj_%03d' % i,
+ 'created_at': Timestamp.now().normal,
+ 'content_type': 'text/plain',
+ 'etag': 'etag_%d' % i,
+ 'size': 1024 * i,
+ 'deleted': i % 2,
+ 'storage_policy_index': i % 2,
+ } for i in range(1, 8)]
+ # merge_items mutates items
+ broker.merge_items([dict(obj) for obj in objects])
+ broker.enable_sharding(Timestamp.now())
+ shard_ranges = self._make_shard_ranges(
+ (('', 'obj_004'), ('obj_004', '')), state=ShardRange.CREATED)
+ expected_shard_dbs = []
+ for shard_range in shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+ broker.merge_shard_ranges(shard_ranges)
+ self.assertTrue(broker.set_sharding_state())
+ node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
+ 'index': 0}
+
+ with self._mock_sharder() as sharder:
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+
+ # check shard ranges were updated to ACTIVE
+ self.assertEqual([ShardRange.ACTIVE] * 2,
+ [sr.state for sr in broker.get_shard_ranges()])
+ shard_broker = ContainerBroker(expected_shard_dbs[0])
+ actual_objects = shard_broker.get_objects()
+ self.assertEqual(objects[:4], actual_objects)
+
+ shard_broker = ContainerBroker(expected_shard_dbs[1])
+ actual_objects = shard_broker.get_objects()
+ self.assertEqual(objects[4:], actual_objects)
+
+ def test_cleave_insufficient_replication(self):
+ # verify that if replication of a cleaved shard range fails then rows
+ # are not merged again to the existing shard db
+ broker = self._make_broker()
+ retiring_db_id = broker.get_info()['id']
+ objects = [
+ {'name': 'obj%03d' % i, 'created_at': next(self.ts_iter),
+ 'size': 1, 'content_type': 'text/plain', 'etag': 'etag',
+ 'deleted': 0, 'storage_policy_index': 0}
+ for i in range(10)
+ ]
+ broker.merge_items([dict(obj) for obj in objects])
+ broker._commit_puts()
+ broker.enable_sharding(Timestamp.now())
+ shard_bounds = (('', 'obj004'), ('obj004', ''))
+ shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.CREATED)
+ expected_shard_dbs = []
+ for shard_range in shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+ broker.merge_shard_ranges(shard_ranges)
+ self.assertTrue(broker.set_sharding_state())
+ new_object = {'name': 'alpha', 'created_at': next(self.ts_iter),
+ 'size': 0, 'content_type': 'text/plain', 'etag': 'etag',
+ 'deleted': 0, 'storage_policy_index': 0}
+ broker.merge_items([dict(new_object)])
+
+ node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
+ 'index': 0}
+ orig_merge_items = ContainerBroker.merge_items
+
+ def mock_merge_items(broker, items):
+ merge_items_calls.append((broker.path,
+ # merge mutates item so make a copy
+ [dict(item) for item in items]))
+ orig_merge_items(broker, items)
+
+ # first shard range cleaved but fails to replicate
+ merge_items_calls = []
+ with mock.patch('swift.container.backend.ContainerBroker.merge_items',
+ mock_merge_items):
+ with self._mock_sharder() as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ return_value=(False, [False, False, True]))
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+ self._assert_shard_ranges_equal(shard_ranges,
+ broker.get_shard_ranges())
+ # first shard range cleaved to shard broker
+ self.assertEqual([(shard_ranges[0].name, objects[:5])],
+ merge_items_calls)
+ # replication of first shard range fails - no more shards attempted
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[0], 0)
+ # shard broker has sync points
+ shard_broker = ContainerBroker(expected_shard_dbs[0])
+ self.assertEqual(
+ [{'remote_id': retiring_db_id, 'sync_point': len(objects)}],
+ shard_broker.get_syncs())
+ self.assertEqual(objects[:5], shard_broker.get_objects())
+
+ # first shard range replicates ok, no new merges required, second is
+ # cleaved but fails to replicate
+ merge_items_calls = []
+ with mock.patch('swift.container.backend.ContainerBroker.merge_items',
+ mock_merge_items), self._mock_sharder() as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=[(False, [False, True, True]),
+ (False, [False, False, True])])
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+
+ broker_shard_ranges = broker.get_shard_ranges()
+ shard_ranges[0].object_count = 5
+ shard_ranges[0].bytes_used = sum(obj['size'] for obj in objects[:5])
+ shard_ranges[0].state = ShardRange.CLEAVED
+ self._check_shard_range(shard_ranges[0], broker_shard_ranges[0])
+ # second shard range still in created state
+ self._assert_shard_ranges_equal([shard_ranges[1]],
+ [broker_shard_ranges[1]])
+ # only second shard range rows were merged to shard db
+ self.assertEqual([(shard_ranges[1].name, objects[5:])],
+ merge_items_calls)
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, expected_shard_dbs[0], 0),
+ mock.call(0, expected_shard_dbs[1], 0)])
+ # shard broker has sync points
+ shard_broker = ContainerBroker(expected_shard_dbs[1])
+ self.assertEqual(
+ [{'remote_id': retiring_db_id, 'sync_point': len(objects)}],
+ shard_broker.get_syncs())
+ self.assertEqual(objects[5:], shard_broker.get_objects())
+
+ # repeat - second shard range cleaves fully because its previously
+ # cleaved shard db no longer exists
+ unlink_files(expected_shard_dbs)
+ merge_items_calls = []
+ with mock.patch('swift.container.backend.ContainerBroker.merge_items',
+ mock_merge_items):
+ with self._mock_sharder() as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=[(True, [True, True, True]), # misplaced obj
+ (False, [False, True, True])])
+ sharder._audit_container = mock.MagicMock()
+ sharder.logger = debug_logger()
+ sharder._process_broker(broker, node, 99)
+
+ self.assertEqual(SHARDED, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDED,
+ broker.get_own_shard_range().state)
+
+ broker_shard_ranges = broker.get_shard_ranges()
+ shard_ranges[1].object_count = 5
+ shard_ranges[1].bytes_used = sum(obj['size'] for obj in objects[5:])
+ shard_ranges[1].state = ShardRange.ACTIVE
+ self._check_shard_range(shard_ranges[1], broker_shard_ranges[1])
+ # second shard range rows were merged to shard db again
+ self.assertEqual([(shard_ranges[0].name, [new_object]),
+ (shard_ranges[1].name, objects[5:])],
+ merge_items_calls)
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, expected_shard_dbs[0], 0),
+ mock.call(0, expected_shard_dbs[1], 0)])
+ # first shard broker was created by misplaced object - no sync point
+ shard_broker = ContainerBroker(expected_shard_dbs[0])
+ self.assertFalse(shard_broker.get_syncs())
+ self.assertEqual([new_object], shard_broker.get_objects())
+ # second shard broker has sync points
+ shard_broker = ContainerBroker(expected_shard_dbs[1])
+ self.assertEqual(
+ [{'remote_id': retiring_db_id, 'sync_point': len(objects)}],
+ shard_broker.get_syncs())
+ self.assertEqual(objects[5:], shard_broker.get_objects())
+
+ def test_shard_replication_quorum_failures(self):
+ broker = self._make_broker()
+ objects = [
+ {'name': 'obj%03d' % i, 'created_at': next(self.ts_iter),
+ 'size': 1, 'content_type': 'text/plain', 'etag': 'etag',
+ 'deleted': 0, 'storage_policy_index': 0}
+ for i in range(10)
+ ]
+ broker.merge_items([dict(obj) for obj in objects])
+ broker._commit_puts()
+ shard_bounds = (('', 'obj002'), ('obj002', 'obj004'),
+ ('obj004', 'obj006'), ('obj006', ''))
+ shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.CREATED)
+ expected_shard_dbs = []
+ for shard_range in shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+ broker.enable_sharding(Timestamp.now())
+ broker.merge_shard_ranges(shard_ranges)
+ self.assertTrue(broker.set_sharding_state())
+ node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
+ 'index': 0}
+ with self._mock_sharder({'shard_replication_quorum': 3}) as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=[(False, [False, True, True]),
+ (False, [False, False, True])])
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+ # replication of first shard range fails - no more shards attempted
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[0], 0)
+ self.assertEqual([ShardRange.CREATED] * 4,
+ [sr.state for sr in broker.get_shard_ranges()])
+
+ # and again with a chilled out quorom, so cleaving moves onto second
+ # shard range which fails to reach even chilled quorum
+ with self._mock_sharder({'shard_replication_quorum': 1}) as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=[(False, [False, False, True]),
+ (False, [False, False, False])])
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+ self.assertEqual(sharder._replicate_object.call_args_list, [
+ mock.call(0, expected_shard_dbs[0], 0),
+ mock.call(0, expected_shard_dbs[1], 0),
+ ])
+ self.assertEqual(
+ [ShardRange.CLEAVED, ShardRange.CREATED, ShardRange.CREATED,
+ ShardRange.CREATED],
+ [sr.state for sr in broker.get_shard_ranges()])
+
+ # now pretend another node successfully cleaved the second shard range,
+ # but this node still fails to replicate so still cannot move on
+ shard_ranges[1].update_state(ShardRange.CLEAVED)
+ broker.merge_shard_ranges(shard_ranges[1])
+ with self._mock_sharder({'shard_replication_quorum': 1}) as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=[(False, [False, False, False])])
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[1], 0)
+ self.assertEqual(
+ [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED,
+ ShardRange.CREATED],
+ [sr.state for sr in broker.get_shard_ranges()])
+
+ # until a super-chilled quorum is used - but even then there must have
+ # been an attempt to replicate
+ with self._mock_sharder(
+ {'shard_replication_quorum': 1,
+ 'existing_shard_replication_quorum': 0}) as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=[(False, [])]) # maybe shard db was deleted
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[1], 0)
+ self.assertEqual(
+ [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED,
+ ShardRange.CREATED],
+ [sr.state for sr in broker.get_shard_ranges()])
+
+ # next pass - the second shard replication is attempted and fails, but
+ # that's ok because another node has cleaved it and
+ # existing_shard_replication_quorum is zero
+ with self._mock_sharder(
+ {'shard_replication_quorum': 1,
+ 'existing_shard_replication_quorum': 0}) as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=[(False, [False, False, False]),
+ (False, [False, True, False])])
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+ self.assertEqual(sharder._replicate_object.call_args_list, [
+ mock.call(0, expected_shard_dbs[1], 0),
+ mock.call(0, expected_shard_dbs[2], 0),
+ ])
+ self.assertEqual([ShardRange.CLEAVED] * 3 + [ShardRange.CREATED],
+ [sr.state for sr in broker.get_shard_ranges()])
+ self.assertEqual(1, sharder.shard_replication_quorum)
+ self.assertEqual(0, sharder.existing_shard_replication_quorum)
+
+ # crazy replication quorums will be capped to replica_count
+ with self._mock_sharder(
+ {'shard_replication_quorum': 99,
+ 'existing_shard_replication_quorum': 99}) as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=[(False, [False, True, True])])
+ sharder._audit_container = mock.MagicMock()
+ sharder.logger = debug_logger()
+ sharder._process_broker(broker, node, 99)
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[3], 0)
+ self.assertEqual([ShardRange.CLEAVED] * 3 + [ShardRange.CREATED],
+ [sr.state for sr in broker.get_shard_ranges()])
+ self.assertEqual(3, sharder.shard_replication_quorum)
+ self.assertEqual(3, sharder.existing_shard_replication_quorum)
+
+ # ...and progress is still made if replication fully succeeds
+ with self._mock_sharder(
+ {'shard_replication_quorum': 99,
+ 'existing_shard_replication_quorum': 99}) as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ side_effect=[(True, [True, True, True])])
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+ self.assertEqual(SHARDED, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDED,
+ broker.get_own_shard_range().state)
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[3], 0)
+ self.assertEqual([ShardRange.ACTIVE] * 4,
+ [sr.state for sr in broker.get_shard_ranges()])
+ warnings = sharder.logger.get_lines_for_level('warning')
+ self.assertIn(
+ 'shard_replication_quorum of 99 exceeds replica count',
+ warnings[0])
+ self.assertIn(
+ 'existing_shard_replication_quorum of 99 exceeds replica count',
+ warnings[1])
+ self.assertEqual(3, sharder.shard_replication_quorum)
+ self.assertEqual(3, sharder.existing_shard_replication_quorum)
+
+ def test_cleave_to_existing_shard_db(self):
+ # verify that when cleaving to an already existing shard db
+ def replicate(node, from_broker, part):
+ # short circuit replication
+ rpc = replicator.ContainerReplicatorRpc(
+ self.tempdir, DATADIR, ContainerBroker, mount_check=False)
+
+ fake_repl_connection = attach_fake_replication_rpc(rpc)
+ with mock.patch('swift.common.db_replicator.ReplConnection',
+ fake_repl_connection):
+ with mock.patch('swift.common.db_replicator.ring.Ring',
+ lambda *args, **kwargs: FakeRing()):
+ daemon = replicator.ContainerReplicator({})
+ info = from_broker.get_replication_info()
+ success = daemon._repl_to_node(
+ node, from_broker, part, info)
+ self.assertTrue(success)
+
+ orig_merge_items = ContainerBroker.merge_items
+
+ def mock_merge_items(broker, items):
+ # capture merge_items calls
+ merge_items_calls.append((broker.path,
+ # merge mutates item so make a copy
+ [dict(item) for item in items]))
+ orig_merge_items(broker, items)
+
+ objects = [
+ {'name': 'obj%03d' % i, 'created_at': next(self.ts_iter),
+ 'size': 1, 'content_type': 'text/plain', 'etag': 'etag',
+ 'deleted': 0, 'storage_policy_index': 0}
+ for i in range(10)
+ ]
+ # local db gets 4 objects
+ local_broker = self._make_broker()
+ local_broker.merge_items([dict(obj) for obj in objects[2:6]])
+ local_broker._commit_puts()
+ local_retiring_db_id = local_broker.get_info()['id']
+
+ # remote db gets 5 objects
+ remote_broker = self._make_broker(device='sdb')
+ remote_broker.merge_items([dict(obj) for obj in objects[2:7]])
+ remote_broker._commit_puts()
+ remote_retiring_db_id = remote_broker.get_info()['id']
+
+ local_node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda',
+ 'id': '2', 'index': 0, 'replication_ip': '1.2.3.4',
+ 'replication_port': 6040}
+ remote_node = {'ip': '1.2.3.5', 'port': 6040, 'device': 'sdb',
+ 'id': '3', 'index': 1, 'replication_ip': '1.2.3.5',
+ 'replication_port': 6040}
+
+ # remote db replicates to local, bringing local db's total to 5 objects
+ self.assertNotEqual(local_broker.get_objects(),
+ remote_broker.get_objects())
+ replicate(local_node, remote_broker, 0)
+ self.assertEqual(local_broker.get_objects(),
+ remote_broker.get_objects())
+
+ # local db gets 2 new objects, bringing its total to 7
+ local_broker.merge_items([dict(obj) for obj in objects[1:2]])
+ local_broker.merge_items([dict(obj) for obj in objects[7:8]])
+
+ # local db gets shard ranges
+ own_shard_range = local_broker.get_own_shard_range()
+ now = Timestamp.now()
+ own_shard_range.update_state(ShardRange.SHARDING, state_timestamp=now)
+ own_shard_range.epoch = now
+ shard_ranges = self._make_shard_ranges(
+ (('', 'obj004'), ('obj004', '')), state=ShardRange.CREATED)
+ local_broker.merge_shard_ranges([own_shard_range] + shard_ranges)
+ self.assertTrue(local_broker.set_sharding_state())
+
+ # local db shards
+ merge_items_calls = []
+ with mock.patch('swift.container.backend.ContainerBroker.merge_items',
+ mock_merge_items):
+ with self._mock_sharder() as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ return_value=(True, [True, True, True]))
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(local_broker, local_node, 0)
+
+ # all objects merged from local to shard ranges
+ self.assertEqual([(shard_ranges[0].name, objects[1:5]),
+ (shard_ranges[1].name, objects[5:8])],
+ merge_items_calls)
+
+ # shard brokers have sync points
+ expected_shard_dbs = []
+ for shard_range in shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+ shard_broker = ContainerBroker(expected_shard_dbs[0])
+ self.assertEqual(
+ [{'remote_id': local_retiring_db_id, 'sync_point': 7},
+ {'remote_id': remote_retiring_db_id, 'sync_point': 5}],
+ shard_broker.get_syncs())
+ self.assertEqual(objects[1:5], shard_broker.get_objects())
+ shard_broker = ContainerBroker(expected_shard_dbs[1])
+ self.assertEqual(
+ [{'remote_id': local_retiring_db_id, 'sync_point': 7},
+ {'remote_id': remote_retiring_db_id, 'sync_point': 5}],
+ shard_broker.get_syncs())
+ self.assertEqual(objects[5:8], shard_broker.get_objects())
+
+ # local db replicates to remote, so remote now has shard ranges
+ # note: no objects replicated because local is sharded
+ self.assertFalse(remote_broker.get_shard_ranges())
+ replicate(remote_node, local_broker, 0)
+ self._assert_shard_ranges_equal(local_broker.get_shard_ranges(),
+ remote_broker.get_shard_ranges())
+
+ # remote db gets 3 new objects, bringing its total to 8
+ remote_broker.merge_items([dict(obj) for obj in objects[:1]])
+ remote_broker.merge_items([dict(obj) for obj in objects[8:]])
+
+ merge_items_calls = []
+ with mock.patch('swift.container.backend.ContainerBroker.merge_items',
+ mock_merge_items):
+ with self._mock_sharder() as sharder:
+ sharder._replicate_object = mock.MagicMock(
+ return_value=(True, [True, True, True]))
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(remote_broker, remote_node, 0)
+
+ # shard brokers have sync points for the remote db so only new objects
+ # are merged from remote broker to shard brokers
+ self.assertEqual([(shard_ranges[0].name, objects[:1]),
+ (shard_ranges[1].name, objects[8:])],
+ merge_items_calls)
+ # sync points are updated
+ shard_broker = ContainerBroker(expected_shard_dbs[0])
+ self.assertEqual(
+ [{'remote_id': local_retiring_db_id, 'sync_point': 7},
+ {'remote_id': remote_retiring_db_id, 'sync_point': 8}],
+ shard_broker.get_syncs())
+ self.assertEqual(objects[:5], shard_broker.get_objects())
+ shard_broker = ContainerBroker(expected_shard_dbs[1])
+ self.assertEqual(
+ [{'remote_id': local_retiring_db_id, 'sync_point': 7},
+ {'remote_id': remote_retiring_db_id, 'sync_point': 8}],
+ shard_broker.get_syncs())
+ self.assertEqual(objects[5:], shard_broker.get_objects())
+
+ def _check_complete_sharding(self, account, container, shard_bounds):
+ broker = self._make_sharding_broker(
+ account=account, container=container, shard_bounds=shard_bounds)
+ obj = {'name': 'obj', 'created_at': next(self.ts_iter).internal,
+ 'size': 14, 'content_type': 'text/plain', 'etag': 'an etag',
+ 'deleted': 0}
+ broker.get_brokers()[0].merge_items([obj])
+ self.assertEqual(2, len(broker.db_files)) # sanity check
+
+ def check_not_complete():
+ with self._mock_sharder() as sharder:
+ self.assertFalse(sharder._complete_sharding(broker))
+ warning_lines = sharder.logger.get_lines_for_level('warning')
+ self.assertIn(
+ 'Repeat cleaving required for %r' % broker.db_files[0],
+ warning_lines[0])
+ self.assertFalse(warning_lines[1:])
+ sharder.logger.clear()
+ context = CleavingContext.load(broker)
+ self.assertFalse(context.cleaving_done)
+ self.assertFalse(context.misplaced_done)
+ self.assertEqual('', context.cursor)
+ self.assertEqual(ShardRange.SHARDING,
+ broker.get_own_shard_range().state)
+ for shard_range in broker.get_shard_ranges():
+ self.assertEqual(ShardRange.CLEAVED, shard_range.state)
+ self.assertEqual(SHARDING, broker.get_db_state())
+
+ # no cleave context progress
+ check_not_complete()
+
+ # cleaving_done is False
+ context = CleavingContext.load(broker)
+ self.assertEqual(1, context.max_row)
+ context.cleave_to_row = 1 # pretend all rows have been cleaved
+ context.cleaving_done = False
+ context.misplaced_done = True
+ context.store(broker)
+ check_not_complete()
+
+ # misplaced_done is False
+ context.misplaced_done = False
+ context.cleaving_done = True
+ context.store(broker)
+ check_not_complete()
+
+ # modified db max row
+ old_broker = broker.get_brokers()[0]
+ obj = {'name': 'obj', 'created_at': next(self.ts_iter).internal,
+ 'size': 14, 'content_type': 'text/plain', 'etag': 'an etag',
+ 'deleted': 1}
+ old_broker.merge_items([obj])
+ self.assertGreater(old_broker.get_max_row(), context.max_row)
+ context.misplaced_done = True
+ context.cleaving_done = True
+ context.store(broker)
+ check_not_complete()
+
+ # db id changes
+ broker.get_brokers()[0].newid('fake_remote_id')
+ context.cleave_to_row = 2 # pretend all rows have been cleaved, again
+ context.store(broker)
+ check_not_complete()
+
+ # context ok
+ context = CleavingContext.load(broker)
+ context.cleave_to_row = context.max_row
+ context.misplaced_done = True
+ context.cleaving_done = True
+ context.store(broker)
+ with self._mock_sharder() as sharder:
+ self.assertTrue(sharder._complete_sharding(broker))
+ self.assertEqual(SHARDED, broker.get_db_state())
+ self.assertEqual(ShardRange.SHARDED,
+ broker.get_own_shard_range().state)
+ for shard_range in broker.get_shard_ranges():
+ self.assertEqual(ShardRange.ACTIVE, shard_range.state)
+ warning_lines = sharder.logger.get_lines_for_level('warning')
+ self.assertFalse(warning_lines)
+ sharder.logger.clear()
+ return broker
+
+ def test_complete_sharding_root(self):
+ broker = self._check_complete_sharding(
+ 'a', 'c', (('', 'mid'), ('mid', '')))
+ self.assertEqual(0, broker.get_own_shard_range().deleted)
+
+ def test_complete_sharding_shard(self):
+ broker = self._check_complete_sharding(
+ '.shards_', 'shard_c', (('l', 'mid'), ('mid', 'u')))
+ self.assertEqual(1, broker.get_own_shard_range().deleted)
+
+ def test_identify_sharding_candidate(self):
+ brokers = [self._make_broker(container='c%03d' % i) for i in range(6)]
+ for broker in brokers:
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ node = {'index': 2}
+ # containers are all empty
+ with self._mock_sharder() as sharder:
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, node)
+ expected_stats = {}
+ self._assert_stats(expected_stats, sharder, 'sharding_candidates')
+
+ objects = [
+ ['obj%3d' % i, next(self.ts_iter).internal, i, 'text/plain',
+ 'etag%s' % i, 0] for i in range(160)]
+
+ # one container has 100 objects, which is below the sharding threshold
+ for obj in objects[:100]:
+ brokers[0].put_object(*obj)
+ conf = {'recon_cache_path': self.tempdir}
+ with self._mock_sharder(conf=conf) as sharder:
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, node)
+ self.assertFalse(sharder.sharding_candidates)
+ expected_recon = {
+ 'found': 0,
+ 'top': []}
+ sharder._report_stats()
+ self._assert_recon_stats(
+ expected_recon, sharder, 'sharding_candidates')
+
+ # reduce the sharding threshold and the container is reported
+ conf = {'shard_container_threshold': 100,
+ 'recon_cache_path': self.tempdir}
+ with self._mock_sharder(conf=conf) as sharder:
+ with mock_timestamp_now() as now:
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, node)
+ stats_0 = {'path': brokers[0].db_file,
+ 'node_index': 2,
+ 'account': 'a',
+ 'container': 'c000',
+ 'root': 'a/c',
+ 'object_count': 100,
+ 'meta_timestamp': now.internal,
+ 'file_size': os.stat(brokers[0].db_file).st_size}
+ self.assertEqual([stats_0], sharder.sharding_candidates)
+ expected_recon = {
+ 'found': 1,
+ 'top': [stats_0]}
+ sharder._report_stats()
+ self._assert_recon_stats(
+ expected_recon, sharder, 'sharding_candidates')
+
+ # repeat with handoff node and db_file error
+ with self._mock_sharder(conf=conf) as sharder:
+ with mock.patch('os.stat', side_effect=OSError('test error')):
+ with mock_timestamp_now(now):
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, {})
+ stats_0_b = {'path': brokers[0].db_file,
+ 'node_index': None,
+ 'account': 'a',
+ 'container': 'c000',
+ 'root': 'a/c',
+ 'object_count': 100,
+ 'meta_timestamp': now.internal,
+ 'file_size': None}
+ self.assertEqual([stats_0_b], sharder.sharding_candidates)
+ self._assert_stats(expected_stats, sharder, 'sharding_candidates')
+ expected_recon = {
+ 'found': 1,
+ 'top': [stats_0_b]}
+ sharder._report_stats()
+ self._assert_recon_stats(
+ expected_recon, sharder, 'sharding_candidates')
+
+ # load up another container, but not to threshold for sharding, and
+ # verify it is never a candidate for sharding
+ for obj in objects[:50]:
+ brokers[2].put_object(*obj)
+ own_sr = brokers[2].get_own_shard_range()
+ for state in ShardRange.STATES:
+ own_sr.update_state(state, state_timestamp=Timestamp.now())
+ brokers[2].merge_shard_ranges([own_sr])
+ with self._mock_sharder(conf=conf) as sharder:
+ with mock_timestamp_now(now):
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, node)
+ with annotate_failure(state):
+ self.assertEqual([stats_0], sharder.sharding_candidates)
+
+ # reduce the threshold and the second container is included
+ conf = {'shard_container_threshold': 50,
+ 'recon_cache_path': self.tempdir}
+ own_sr.update_state(ShardRange.ACTIVE, state_timestamp=Timestamp.now())
+ brokers[2].merge_shard_ranges([own_sr])
+ with self._mock_sharder(conf=conf) as sharder:
+ with mock_timestamp_now(now):
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, node)
+ stats_2 = {'path': brokers[2].db_file,
+ 'node_index': 2,
+ 'account': 'a',
+ 'container': 'c002',
+ 'root': 'a/c',
+ 'object_count': 50,
+ 'meta_timestamp': now.internal,
+ 'file_size': os.stat(brokers[2].db_file).st_size}
+ self.assertEqual([stats_0, stats_2], sharder.sharding_candidates)
+ expected_recon = {
+ 'found': 2,
+ 'top': [stats_0, stats_2]}
+ sharder._report_stats()
+ self._assert_recon_stats(
+ expected_recon, sharder, 'sharding_candidates')
+
+ # a broker not in active state is not included
+ own_sr = brokers[0].get_own_shard_range()
+ for state in ShardRange.STATES:
+ if state == ShardRange.ACTIVE:
+ continue
+ own_sr.update_state(state, state_timestamp=Timestamp.now())
+ brokers[0].merge_shard_ranges([own_sr])
+ with self._mock_sharder(conf=conf) as sharder:
+ with mock_timestamp_now(now):
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, node)
+ with annotate_failure(state):
+ self.assertEqual([stats_2], sharder.sharding_candidates)
+
+ own_sr.update_state(ShardRange.ACTIVE, state_timestamp=Timestamp.now())
+ brokers[0].merge_shard_ranges([own_sr])
+
+ # load up a third container with 150 objects
+ for obj in objects[:150]:
+ brokers[5].put_object(*obj)
+ with self._mock_sharder(conf=conf) as sharder:
+ with mock_timestamp_now(now):
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, node)
+ stats_5 = {'path': brokers[5].db_file,
+ 'node_index': 2,
+ 'account': 'a',
+ 'container': 'c005',
+ 'root': 'a/c',
+ 'object_count': 150,
+ 'meta_timestamp': now.internal,
+ 'file_size': os.stat(brokers[5].db_file).st_size}
+ self.assertEqual([stats_0, stats_2, stats_5],
+ sharder.sharding_candidates)
+ # note recon top list is sorted by size
+ expected_recon = {
+ 'found': 3,
+ 'top': [stats_5, stats_0, stats_2]}
+ sharder._report_stats()
+ self._assert_recon_stats(
+ expected_recon, sharder, 'sharding_candidates')
+
+ # restrict the number of reported candidates
+ conf = {'shard_container_threshold': 50,
+ 'recon_cache_path': self.tempdir,
+ 'recon_candidates_limit': 2}
+ with self._mock_sharder(conf=conf) as sharder:
+ with mock_timestamp_now(now):
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, node)
+ self.assertEqual([stats_0, stats_2, stats_5],
+ sharder.sharding_candidates)
+ expected_recon = {
+ 'found': 3,
+ 'top': [stats_5, stats_0]}
+ sharder._report_stats()
+ self._assert_recon_stats(
+ expected_recon, sharder, 'sharding_candidates')
+
+ # unrestrict the number of reported candidates
+ conf = {'shard_container_threshold': 50,
+ 'recon_cache_path': self.tempdir,
+ 'recon_candidates_limit': -1}
+ for i, broker in enumerate([brokers[1]] + brokers[3:5]):
+ for obj in objects[:(151 + i)]:
+ broker.put_object(*obj)
+ with self._mock_sharder(conf=conf) as sharder:
+ with mock_timestamp_now(now):
+ for broker in brokers:
+ sharder._identify_sharding_candidate(broker, node)
+
+ stats_4 = {'path': brokers[4].db_file,
+ 'node_index': 2,
+ 'account': 'a',
+ 'container': 'c004',
+ 'root': 'a/c',
+ 'object_count': 153,
+ 'meta_timestamp': now.internal,
+ 'file_size': os.stat(brokers[4].db_file).st_size}
+ stats_3 = {'path': brokers[3].db_file,
+ 'node_index': 2,
+ 'account': 'a',
+ 'container': 'c003',
+ 'root': 'a/c',
+ 'object_count': 152,
+ 'meta_timestamp': now.internal,
+ 'file_size': os.stat(brokers[3].db_file).st_size}
+ stats_1 = {'path': brokers[1].db_file,
+ 'node_index': 2,
+ 'account': 'a',
+ 'container': 'c001',
+ 'root': 'a/c',
+ 'object_count': 151,
+ 'meta_timestamp': now.internal,
+ 'file_size': os.stat(brokers[1].db_file).st_size}
+
+ self.assertEqual(
+ [stats_0, stats_1, stats_2, stats_3, stats_4, stats_5],
+ sharder.sharding_candidates)
+ self._assert_stats(expected_stats, sharder, 'sharding_candidates')
+ expected_recon = {
+ 'found': 6,
+ 'top': [stats_4, stats_3, stats_1, stats_5, stats_0, stats_2]}
+ sharder._report_stats()
+ self._assert_recon_stats(
+ expected_recon, sharder, 'sharding_candidates')
+
+ def test_misplaced_objects_root_container(self):
+ broker = self._make_broker()
+ broker.enable_sharding(next(self.ts_iter))
+
+ objects = [
+ # misplaced objects in second and third shard ranges
+ ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0],
+ ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 1],
+ ['where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0,
+ 0],
+ # deleted
+ ['x', self.ts_encoded(), 0, '', '', 1, 1],
+ ]
+
+ shard_bounds = (('', 'here'), ('here', 'there'),
+ ('there', 'where'), ('where', 'yonder'),
+ ('yonder', ''))
+ initial_shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.ACTIVE)
+ expected_shard_dbs = []
+ for shard_range in initial_shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+ broker.merge_shard_ranges(initial_shard_ranges)
+
+ # unsharded
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+ sharder._replicate_object.assert_not_called()
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 0, 'placed': 0, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertFalse(
+ sharder.logger.get_increment_counts().get('misplaced_found'))
+
+ # sharding - no misplaced objects
+ self.assertTrue(broker.set_sharding_state())
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+ sharder._replicate_object.assert_not_called()
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertFalse(
+ sharder.logger.get_increment_counts().get('misplaced_found'))
+
+ # pretend we cleaved up to end of second shard range
+ context = CleavingContext.load(broker)
+ context.cursor = 'there'
+ context.store(broker)
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+ sharder._replicate_object.assert_not_called()
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertFalse(
+ sharder.logger.get_increment_counts().get('misplaced_found'))
+
+ # sharding - misplaced objects
+ for obj in objects:
+ broker.put_object(*obj)
+ # pretend we have not cleaved any ranges
+ context.cursor = ''
+ context.store(broker)
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+ sharder._replicate_object.assert_not_called()
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertFalse(
+ sharder.logger.get_increment_counts().get('misplaced_found'))
+ self.assertFalse(os.path.exists(expected_shard_dbs[0]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[1]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[2]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[3]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[4]))
+
+ # pretend we cleaved up to end of second shard range
+ context.cursor = 'there'
+ context.store(broker)
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+
+ sharder._replicate_object.assert_called_once_with(
+ 0, expected_shard_dbs[1], 0)
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 2, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ # check misplaced objects were moved
+ self._check_objects(objects[:2], expected_shard_dbs[1])
+ # ... and removed from the source db
+ self._check_objects(objects[2:], broker.db_file)
+ # ... and nothing else moved
+ self.assertFalse(os.path.exists(expected_shard_dbs[0]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[2]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[3]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[4]))
+
+ # pretend we cleaved up to end of fourth shard range
+ context.cursor = 'yonder'
+ context.store(broker)
+ # and some new misplaced updates arrived in the first shard range
+ new_objects = [
+ ['b', self.ts_encoded(), 10, 'text/plain', 'etag_b', 0, 0],
+ ['c', self.ts_encoded(), 20, 'text/plain', 'etag_c', 0, 0],
+ ]
+ for obj in new_objects:
+ broker.put_object(*obj)
+
+ # check that *all* misplaced objects are moved despite exceeding
+ # the listing limit
+ with self._mock_sharder(conf={'cleave_row_batch_size': 2}) as sharder:
+ sharder._move_misplaced_objects(broker)
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 4, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0) for db in expected_shard_dbs[2:4]],
+ any_order=True
+ )
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+
+ # check misplaced objects were moved
+ self._check_objects(new_objects, expected_shard_dbs[0])
+ self._check_objects(objects[:2], expected_shard_dbs[1])
+ self._check_objects(objects[2:3], expected_shard_dbs[2])
+ self._check_objects(objects[3:], expected_shard_dbs[3])
+ # ... and removed from the source db
+ self._check_objects([], broker.db_file)
+ self.assertFalse(os.path.exists(expected_shard_dbs[4]))
+
+ # pretend we cleaved all ranges - sharded state
+ self.assertTrue(broker.set_sharded_state())
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+ sharder._replicate_object.assert_not_called()
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 0, 'placed': 0, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertFalse(
+ sharder.logger.get_increment_counts().get('misplaced_found'))
+
+ # and then more misplaced updates arrive
+ newer_objects = [
+ ['a', self.ts_encoded(), 51, 'text/plain', 'etag_a', 0, 0],
+ ['z', self.ts_encoded(), 52, 'text/plain', 'etag_z', 0, 0],
+ ]
+ for obj in newer_objects:
+ broker.put_object(*obj)
+ broker.get_info() # force updates to be committed
+ # sanity check the puts landed in sharded broker
+ self._check_objects(newer_objects, broker.db_file)
+
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0)
+ for db in (expected_shard_dbs[0], expected_shard_dbs[-1])],
+ any_order=True
+ )
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 2, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+
+ # check new misplaced objects were moved
+ self._check_objects(newer_objects[:1] + new_objects,
+ expected_shard_dbs[0])
+ self._check_objects(newer_objects[1:], expected_shard_dbs[4])
+ # ... and removed from the source db
+ self._check_objects([], broker.db_file)
+ # ... and other shard dbs were unchanged
+ self._check_objects(objects[:2], expected_shard_dbs[1])
+ self._check_objects(objects[2:3], expected_shard_dbs[2])
+ self._check_objects(objects[3:], expected_shard_dbs[3])
+
+ def _setup_misplaced_objects(self):
+ # make a broker with shard ranges, move it to sharded state and then
+ # put some misplaced objects in it
+ broker = self._make_broker()
+ shard_bounds = (('', 'here'), ('here', 'there'),
+ ('there', 'where'), ('where', 'yonder'),
+ ('yonder', ''))
+ initial_shard_ranges = [
+ ShardRange('.shards_a/%s-%s' % (lower, upper),
+ Timestamp.now(), lower, upper, state=ShardRange.ACTIVE)
+ for lower, upper in shard_bounds
+ ]
+ expected_dbs = []
+ for shard_range in initial_shard_ranges:
+ db_hash = hash_path(shard_range.account, shard_range.container)
+ expected_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+ broker.merge_shard_ranges(initial_shard_ranges)
+ objects = [
+ # misplaced objects in second, third and fourth shard ranges
+ ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0],
+ ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0],
+ ['where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0,
+ 0],
+ # deleted
+ ['x', self.ts_encoded(), 0, '', '', 1, 0],
+ ]
+ broker.enable_sharding(Timestamp.now())
+ self.assertTrue(broker.set_sharding_state())
+ self.assertTrue(broker.set_sharded_state())
+ for obj in objects:
+ broker.put_object(*obj)
+ self.assertEqual(SHARDED, broker.get_db_state())
+ return broker, objects, expected_dbs
+
+ def test_misplaced_objects_newer_objects(self):
+ # verify that objects merged to the db after misplaced objects have
+ # been identified are not removed from the db
+ broker, objects, expected_dbs = self._setup_misplaced_objects()
+ newer_objects = [
+ ['j', self.ts_encoded(), 51, 'text/plain', 'etag_j', 0, 0],
+ ['k', self.ts_encoded(), 52, 'text/plain', 'etag_k', 1, 0],
+ ]
+
+ calls = []
+ pre_removal_objects = []
+
+ def mock_replicate_object(part, db, node_id):
+ calls.append((part, db, node_id))
+ if db == expected_dbs[1]:
+ # put some new objects in the shard range that is being
+ # replicated before misplaced objects are removed from that
+ # range in the source db
+ for obj in newer_objects:
+ broker.put_object(*obj)
+ # grab a snapshot of the db contents - a side effect is
+ # that the newer objects are now committed to the db
+ pre_removal_objects.extend(
+ broker.get_objects())
+ return True, [True, True, True]
+
+ with self._mock_sharder(replicas=3) as sharder:
+ sharder._replicate_object = mock_replicate_object
+ sharder._move_misplaced_objects(broker)
+
+ # sanity check - the newer objects were in the db before the misplaced
+ # object were removed
+ for obj in newer_objects:
+ self.assertIn(obj[0], [o['name'] for o in pre_removal_objects])
+ for obj in objects[:2]:
+ self.assertIn(obj[0], [o['name'] for o in pre_removal_objects])
+
+ self.assertEqual(
+ set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls))
+
+ # check misplaced objects were moved
+ self._check_objects(objects[:2], expected_dbs[1])
+ self._check_objects(objects[2:3], expected_dbs[2])
+ self._check_objects(objects[3:], expected_dbs[3])
+ # ... but newer objects were not removed from the source db
+ self._check_objects(newer_objects, broker.db_file)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 4, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+
+ # they will be moved on next cycle
+ unlink_files(expected_dbs)
+ with self._mock_sharder(replicas=3) as sharder:
+ sharder._move_misplaced_objects(broker)
+
+ self._check_objects(newer_objects, expected_dbs[1])
+ self._check_objects([], broker.db_file)
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 2, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+
+ def test_misplaced_objects_db_id_changed(self):
+ broker, objects, expected_dbs = self._setup_misplaced_objects()
+
+ pre_info = broker.get_info()
+ calls = []
+ expected_retained_objects = []
+ expected_retained_objects_dbs = []
+
+ def mock_replicate_object(part, db, node_id):
+ calls.append((part, db, node_id))
+ if len(calls) == 2:
+ broker.newid('fake_remote_id')
+ # grab snapshot of the objects in the broker when it changed id
+ expected_retained_objects.extend(
+ self._get_raw_object_records(broker))
+ if len(calls) >= 2:
+ expected_retained_objects_dbs.append(db)
+ return True, [True, True, True]
+
+ with self._mock_sharder(replicas=3) as sharder:
+ sharder._replicate_object = mock_replicate_object
+ sharder._move_misplaced_objects(broker)
+
+ # sanity checks
+ self.assertNotEqual(pre_info['id'], broker.get_info()['id'])
+ self.assertTrue(expected_retained_objects)
+
+ self.assertEqual(
+ set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls))
+
+ # check misplaced objects were moved
+ self._check_objects(objects[:2], expected_dbs[1])
+ self._check_objects(objects[2:3], expected_dbs[2])
+ self._check_objects(objects[3:], expected_dbs[3])
+ # ... but objects were not removed after the source db id changed
+ self._check_objects(expected_retained_objects, broker.db_file)
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1,
+ 'found': 1, 'placed': 4, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+
+ lines = sharder.logger.get_lines_for_level('warning')
+ self.assertIn('Refused to remove misplaced objects', lines[0])
+ self.assertIn('Refused to remove misplaced objects', lines[1])
+ self.assertFalse(lines[2:])
+
+ # they will be moved again on next cycle
+ unlink_files(expected_dbs)
+ sharder.logger.clear()
+ with self._mock_sharder(replicas=3) as sharder:
+ sharder._move_misplaced_objects(broker)
+
+ self.assertEqual(2, len(set(expected_retained_objects_dbs)))
+ for db in expected_retained_objects_dbs:
+ if db == expected_dbs[1]:
+ self._check_objects(objects[:2], expected_dbs[1])
+ if db == expected_dbs[2]:
+ self._check_objects(objects[2:3], expected_dbs[2])
+ if db == expected_dbs[3]:
+ self._check_objects(objects[3:], expected_dbs[3])
+ self._check_objects([], broker.db_file)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': len(expected_retained_objects),
+ 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+
+ def test_misplaced_objects_sufficient_replication(self):
+ broker, objects, expected_dbs = self._setup_misplaced_objects()
+
+ with self._mock_sharder(replicas=3) as sharder:
+ sharder._replicate_object.return_value = (True, [True, True, True])
+ sharder._move_misplaced_objects(broker)
+
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0) for db in (expected_dbs[2:4])],
+ any_order=True)
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 4, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ # check misplaced objects were moved
+ self._check_objects(objects[:2], expected_dbs[1])
+ self._check_objects(objects[2:3], expected_dbs[2])
+ self._check_objects(objects[3:], expected_dbs[3])
+ # ... and removed from the source db
+ self._check_objects([], broker.db_file)
+ # ... and nothing else moved
+ self.assertFalse(os.path.exists(expected_dbs[0]))
+ self.assertFalse(os.path.exists(expected_dbs[4]))
+
+ def test_misplaced_objects_insufficient_replication_3_replicas(self):
+ broker, objects, expected_dbs = self._setup_misplaced_objects()
+
+ returns = {expected_dbs[1]: (True, [True, True, True]), # ok
+ expected_dbs[2]: (False, [True, False, False]), # < quorum
+ expected_dbs[3]: (False, [False, True, True])} # ok
+ calls = []
+
+ def mock_replicate_object(part, db, node_id):
+ calls.append((part, db, node_id))
+ return returns[db]
+
+ with self._mock_sharder(replicas=3) as sharder:
+ sharder._replicate_object = mock_replicate_object
+ sharder._move_misplaced_objects(broker)
+
+ self.assertEqual(
+ set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls))
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1,
+ 'placed': 4, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ # check misplaced objects were moved to shard dbs
+ self._check_objects(objects[:2], expected_dbs[1])
+ self._check_objects(objects[2:3], expected_dbs[2])
+ self._check_objects(objects[3:], expected_dbs[3])
+ # ... but only removed from the source db if sufficiently replicated
+ self._check_objects(objects[2:3], broker.db_file)
+ # ... and nothing else moved
+ self.assertFalse(os.path.exists(expected_dbs[0]))
+ self.assertFalse(os.path.exists(expected_dbs[4]))
+
+ def test_misplaced_objects_insufficient_replication_2_replicas(self):
+ broker, objects, expected_dbs = self._setup_misplaced_objects()
+
+ returns = {expected_dbs[1]: (True, [True, True]), # ok
+ expected_dbs[2]: (False, [True, False]), # ok
+ expected_dbs[3]: (False, [False, False])} # < quorum>
+ calls = []
+
+ def mock_replicate_object(part, db, node_id):
+ calls.append((part, db, node_id))
+ return returns[db]
+
+ with self._mock_sharder(replicas=2) as sharder:
+ sharder._replicate_object = mock_replicate_object
+ sharder._move_misplaced_objects(broker)
+
+ self.assertEqual(
+ set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls))
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1,
+ 'placed': 4, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ # check misplaced objects were moved to shard dbs
+ self._check_objects(objects[:2], expected_dbs[1])
+ self._check_objects(objects[2:3], expected_dbs[2])
+ self._check_objects(objects[3:], expected_dbs[3])
+ # ... but only removed from the source db if sufficiently replicated
+ self._check_objects(objects[3:], broker.db_file)
+ # ... and nothing else moved
+ self.assertFalse(os.path.exists(expected_dbs[0]))
+ self.assertFalse(os.path.exists(expected_dbs[4]))
+
+ def test_misplaced_objects_insufficient_replication_4_replicas(self):
+ broker, objects, expected_dbs = self._setup_misplaced_objects()
+
+ returns = {expected_dbs[1]: (False, [True, False, False, False]),
+ expected_dbs[2]: (True, [True, False, False, True]),
+ expected_dbs[3]: (False, [False, False, False, False])}
+ calls = []
+
+ def mock_replicate_object(part, db, node_id):
+ calls.append((part, db, node_id))
+ return returns[db]
+
+ with self._mock_sharder(replicas=4) as sharder:
+ sharder._replicate_object = mock_replicate_object
+ sharder._move_misplaced_objects(broker)
+
+ self.assertEqual(
+ set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls))
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1,
+ 'placed': 4, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ # check misplaced objects were moved to shard dbs
+ self._check_objects(objects[:2], expected_dbs[1])
+ self._check_objects(objects[2:3], expected_dbs[2])
+ self._check_objects(objects[3:], expected_dbs[3])
+ # ... but only removed from the source db if sufficiently replicated
+ self._check_objects(objects[:2] + objects[3:], broker.db_file)
+ # ... and nothing else moved
+ self.assertFalse(os.path.exists(expected_dbs[0]))
+ self.assertFalse(os.path.exists(expected_dbs[4]))
+
+ def _check_misplaced_objects_shard_container_unsharded(self, conf=None):
+ broker = self._make_broker(account='.shards_a', container='.shard_c')
+ ts_shard = next(self.ts_iter)
+ own_sr = ShardRange(broker.path, ts_shard, 'here', 'where')
+ broker.merge_shard_ranges([own_sr])
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ self.assertEqual(own_sr, broker.get_own_shard_range()) # sanity check
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+
+ objects = [
+ # some of these are misplaced objects
+ ['b', self.ts_encoded(), 2, 'text/plain', 'etag_b', 0, 0],
+ ['here', self.ts_encoded(), 2, 'text/plain', 'etag_here', 0, 0],
+ ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0],
+ ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0],
+ ['x', self.ts_encoded(), 0, '', '', 1, 0], # deleted
+ ['y', self.ts_encoded(), 10, 'text/plain', 'etag_y', 0, 0],
+ ]
+
+ shard_bounds = (('', 'here'), ('here', 'there'),
+ ('there', 'where'), ('where', ''))
+ root_shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.ACTIVE)
+ expected_shard_dbs = []
+ for sr in root_shard_ranges:
+ db_hash = hash_path(sr.account, sr.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+
+ # no objects
+ with self._mock_sharder(conf=conf) as sharder:
+ sharder._fetch_shard_ranges = mock.MagicMock(
+ return_value=root_shard_ranges)
+ sharder._move_misplaced_objects(broker)
+
+ sharder._fetch_shard_ranges.assert_not_called()
+
+ sharder._replicate_object.assert_not_called()
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 0, 'placed': 0, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertFalse(
+ sharder.logger.get_increment_counts().get('misplaced_found'))
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+
+ # now put objects
+ for obj in objects:
+ broker.put_object(*obj)
+ self._check_objects(objects, broker.db_file) # sanity check
+
+ # NB final shard range not available
+ with self._mock_sharder(conf=conf) as sharder:
+ sharder._fetch_shard_ranges = mock.MagicMock(
+ return_value=root_shard_ranges[:-1])
+ sharder._move_misplaced_objects(broker)
+
+ sharder._fetch_shard_ranges.assert_has_calls(
+ [mock.call(broker, newest=True, params={'states': 'updating',
+ 'marker': '',
+ 'end_marker': 'here\x00'}),
+ mock.call(broker, newest=True, params={'states': 'updating',
+ 'marker': 'where',
+ 'end_marker': ''})])
+ sharder._replicate_object.assert_called_with(
+ 0, expected_shard_dbs[0], 0),
+
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1,
+ 'found': 1, 'placed': 2, 'unplaced': 2}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ # some misplaced objects could not be moved...
+ warning_lines = sharder.logger.get_lines_for_level('warning')
+ self.assertIn(
+ 'Failed to find destination for at least 2 misplaced objects',
+ warning_lines[0])
+ self.assertFalse(warning_lines[1:])
+ sharder.logger.clear()
+
+ # check misplaced objects were moved
+ self._check_objects(objects[:2], expected_shard_dbs[0])
+ # ... and removed from the source db
+ self._check_objects(objects[2:], broker.db_file)
+ # ... and nothing else moved
+ self.assertFalse(os.path.exists(expected_shard_dbs[1]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[2]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[3]))
+
+ # repeat with final shard range available
+ with self._mock_sharder(conf=conf) as sharder:
+ sharder._fetch_shard_ranges = mock.MagicMock(
+ return_value=root_shard_ranges)
+ sharder._move_misplaced_objects(broker)
+
+ sharder._fetch_shard_ranges.assert_has_calls(
+ [mock.call(broker, newest=True, params={'states': 'updating',
+ 'marker': 'where',
+ 'end_marker': ''})])
+
+ sharder._replicate_object.assert_called_with(
+ 0, expected_shard_dbs[-1], 0),
+
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 2, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+
+ # check misplaced objects were moved
+ self._check_objects(objects[:2], expected_shard_dbs[0])
+ self._check_objects(objects[4:], expected_shard_dbs[3])
+ # ... and removed from the source db
+ self._check_objects(objects[2:4], broker.db_file)
+ # ... and nothing else moved
+ self.assertFalse(os.path.exists(expected_shard_dbs[1]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[2]))
+
+ # repeat - no work remaining
+ with self._mock_sharder(conf=conf) as sharder:
+ sharder._fetch_shard_ranges = mock.MagicMock(
+ return_value=root_shard_ranges)
+ sharder._move_misplaced_objects(broker)
+
+ sharder._fetch_shard_ranges.assert_not_called()
+ sharder._replicate_object.assert_not_called()
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 0, 'placed': 0, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertFalse(
+ sharder.logger.get_increment_counts().get('misplaced_found'))
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+
+ # and then more misplaced updates arrive
+ new_objects = [
+ ['a', self.ts_encoded(), 51, 'text/plain', 'etag_a', 0, 0],
+ ['z', self.ts_encoded(), 52, 'text/plain', 'etag_z', 0, 0],
+ ]
+ for obj in new_objects:
+ broker.put_object(*obj)
+ # sanity check the puts landed in sharded broker
+ self._check_objects(new_objects[:1] + objects[2:4] + new_objects[1:],
+ broker.db_file)
+
+ with self._mock_sharder(conf=conf) as sharder:
+ sharder._fetch_shard_ranges = mock.MagicMock(
+ return_value=root_shard_ranges)
+ sharder._move_misplaced_objects(broker)
+
+ sharder._fetch_shard_ranges.assert_has_calls(
+ [mock.call(broker, newest=True, params={'states': 'updating',
+ 'marker': '',
+ 'end_marker': 'here\x00'}),
+ mock.call(broker, newest=True, params={'states': 'updating',
+ 'marker': 'where',
+ 'end_marker': ''})])
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0)
+ for db in (expected_shard_dbs[0], expected_shard_dbs[3])],
+ any_order=True
+ )
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 2, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+
+ # check new misplaced objects were moved
+ self._check_objects(new_objects[:1] + objects[:2],
+ expected_shard_dbs[0])
+ self._check_objects(objects[4:] + new_objects[1:],
+ expected_shard_dbs[3])
+ # ... and removed from the source db
+ self._check_objects(objects[2:4], broker.db_file)
+ # ... and nothing else moved
+ self.assertFalse(os.path.exists(expected_shard_dbs[1]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[2]))
+
+ def test_misplaced_objects_shard_container_unsharded(self):
+ self._check_misplaced_objects_shard_container_unsharded()
+
+ def test_misplaced_objects_shard_container_unsharded_limit_two(self):
+ self._check_misplaced_objects_shard_container_unsharded(
+ conf={'cleave_row_batch_size': 2})
+
+ def test_misplaced_objects_shard_container_unsharded_limit_one(self):
+ self._check_misplaced_objects_shard_container_unsharded(
+ conf={'cleave_row_batch_size': 1})
+
+ def test_misplaced_objects_shard_container_sharding(self):
+ broker = self._make_broker(account='.shards_a', container='shard_c')
+ ts_shard = next(self.ts_iter)
+ # note that own_sr spans two root shard ranges
+ own_sr = ShardRange(broker.path, ts_shard, 'here', 'where')
+ own_sr.update_state(ShardRange.SHARDING)
+ own_sr.epoch = next(self.ts_iter)
+ broker.merge_shard_ranges([own_sr])
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ self.assertEqual(own_sr, broker.get_own_shard_range()) # sanity check
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+
+ objects = [
+ # some of these are misplaced objects
+ ['b', self.ts_encoded(), 2, 'text/plain', 'etag_b', 0, 0],
+ ['here', self.ts_encoded(), 2, 'text/plain', 'etag_here', 0, 0],
+ ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0],
+ ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0],
+ ['v', self.ts_encoded(), 10, 'text/plain', 'etag_v', 0, 0],
+ ['y', self.ts_encoded(), 10, 'text/plain', 'etag_y', 0, 0],
+ ]
+
+ shard_bounds = (('', 'here'), ('here', 'there'),
+ ('there', 'where'), ('where', ''))
+ root_shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.ACTIVE)
+ expected_shard_dbs = []
+ for sr in root_shard_ranges:
+ db_hash = hash_path(sr.account, sr.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+
+ # pretend broker is sharding but not yet cleaved a shard
+ self.assertTrue(broker.set_sharding_state())
+ broker.merge_shard_ranges([dict(sr) for sr in root_shard_ranges[1:3]])
+ # then some updates arrive
+ for obj in objects:
+ broker.put_object(*obj)
+ broker.get_info()
+ self._check_objects(objects, broker.db_file) # sanity check
+
+ # first destination is not available
+ with self._mock_sharder() as sharder:
+ sharder._fetch_shard_ranges = mock.MagicMock(
+ return_value=root_shard_ranges[1:])
+ sharder._move_misplaced_objects(broker)
+
+ sharder._fetch_shard_ranges.assert_has_calls(
+ [mock.call(broker, newest=True, params={'states': 'updating',
+ 'marker': '',
+ 'end_marker': 'here\x00'}),
+ mock.call(broker, newest=True, params={'states': 'updating',
+ 'marker': 'where',
+ 'end_marker': ''})])
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, expected_shard_dbs[-1], 0)],
+ )
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1,
+ 'found': 1, 'placed': 1, 'unplaced': 2}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ warning_lines = sharder.logger.get_lines_for_level('warning')
+ self.assertIn(
+ 'Failed to find destination for at least 2 misplaced objects',
+ warning_lines[0])
+ self.assertFalse(warning_lines[1:])
+ sharder.logger.clear()
+
+ # check some misplaced objects were moved
+ self._check_objects(objects[5:], expected_shard_dbs[3])
+ # ... and removed from the source db
+ self._check_objects(objects[:5], broker.db_file)
+ self.assertFalse(os.path.exists(expected_shard_dbs[0]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[1]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[2]))
+
+ # normality resumes and all destinations are available
+ with self._mock_sharder() as sharder:
+ sharder._fetch_shard_ranges = mock.MagicMock(
+ return_value=root_shard_ranges)
+ sharder._move_misplaced_objects(broker)
+
+ sharder._fetch_shard_ranges.assert_has_calls(
+ [mock.call(broker, newest=True, params={'states': 'updating',
+ 'marker': '',
+ 'end_marker': 'here\x00'})]
+ )
+
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, expected_shard_dbs[0], 0)],
+ )
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 2, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+
+ # check misplaced objects were moved
+ self._check_objects(objects[:2], expected_shard_dbs[0])
+ self._check_objects(objects[5:], expected_shard_dbs[3])
+ # ... and removed from the source db
+ self._check_objects(objects[2:5], broker.db_file)
+ self.assertFalse(os.path.exists(expected_shard_dbs[1]))
+ self.assertFalse(os.path.exists(expected_shard_dbs[2]))
+
+ # pretend first shard has been cleaved
+ context = CleavingContext.load(broker)
+ context.cursor = 'there'
+ context.store(broker)
+ # and then more misplaced updates arrive
+ new_objects = [
+ ['a', self.ts_encoded(), 51, 'text/plain', 'etag_a', 0, 0],
+ # this one is in the now cleaved shard range...
+ ['k', self.ts_encoded(), 52, 'text/plain', 'etag_k', 0, 0],
+ ['z', self.ts_encoded(), 53, 'text/plain', 'etag_z', 0, 0],
+ ]
+ for obj in new_objects:
+ broker.put_object(*obj)
+ broker.get_info() # force updates to be committed
+ # sanity check the puts landed in sharded broker
+ self._check_objects(sorted(new_objects + objects[2:5]), broker.db_file)
+ with self._mock_sharder() as sharder:
+ sharder._fetch_shard_ranges = mock.MagicMock(
+ return_value=root_shard_ranges)
+ sharder._move_misplaced_objects(broker)
+
+ sharder._fetch_shard_ranges.assert_has_calls(
+ [mock.call(broker, newest=True,
+ params={'states': 'updating', 'marker': '',
+ 'end_marker': 'there\x00'}),
+ mock.call(broker, newest=True,
+ params={'states': 'updating', 'marker': 'where',
+ 'end_marker': ''})])
+
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0) for db in (expected_shard_dbs[0],
+ expected_shard_dbs[1],
+ expected_shard_dbs[-1])],
+ any_order=True
+ )
+
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 5, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+
+ # check *all* the misplaced objects were moved
+ self._check_objects(new_objects[:1] + objects[:2],
+ expected_shard_dbs[0])
+ self._check_objects(new_objects[1:2] + objects[2:4],
+ expected_shard_dbs[1])
+ self._check_objects(objects[5:] + new_objects[2:],
+ expected_shard_dbs[3])
+ # ... and removed from the source db
+ self._check_objects(objects[4:5], broker.db_file)
+ self.assertFalse(os.path.exists(expected_shard_dbs[2]))
+
+ def test_misplaced_objects_deleted_and_updated(self):
+ # setup
+ broker = self._make_broker()
+ broker.enable_sharding(next(self.ts_iter))
+
+ shard_bounds = (('', 'here'), ('here', ''))
+ root_shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.ACTIVE)
+ expected_shard_dbs = []
+ for sr in root_shard_ranges:
+ db_hash = hash_path(sr.account, sr.container)
+ expected_shard_dbs.append(
+ os.path.join(self.tempdir, 'sda', 'containers', '0',
+ db_hash[-3:], db_hash, db_hash + '.db'))
+ broker.merge_shard_ranges(root_shard_ranges)
+ self.assertTrue(broker.set_sharding_state())
+
+ ts_older_internal = self.ts_encoded() # used later
+ # put deleted objects into source
+ objects = [
+ ['b', self.ts_encoded(), 0, '', '', 1, 0],
+ ['x', self.ts_encoded(), 0, '', '', 1, 0]
+ ]
+ for obj in objects:
+ broker.put_object(*obj)
+ broker.get_info()
+ self._check_objects(objects, broker.db_file) # sanity check
+ # pretend we cleaved all ranges - sharded state
+ self.assertTrue(broker.set_sharded_state())
+
+ with self._mock_sharder() as sharder:
+ sharder.logger = debug_logger()
+ sharder._move_misplaced_objects(broker)
+
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0) for db in (expected_shard_dbs[0],
+ expected_shard_dbs[1])],
+ any_order=True
+ )
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'placed': 2, 'unplaced': 0}
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+
+ # check new misplaced objects were moved
+ self._check_objects(objects[:1], expected_shard_dbs[0])
+ self._check_objects(objects[1:], expected_shard_dbs[1])
+ # ... and removed from the source db
+ self._check_objects([], broker.db_file)
+
+ # update source db with older undeleted versions of same objects
+ old_objects = [
+ ['b', ts_older_internal, 2, 'text/plain', 'etag_b', 0, 0],
+ ['x', ts_older_internal, 4, 'text/plain', 'etag_x', 0, 0]
+ ]
+ for obj in old_objects:
+ broker.put_object(*obj)
+ broker.get_info()
+ self._check_objects(old_objects, broker.db_file) # sanity check
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0) for db in (expected_shard_dbs[0],
+ expected_shard_dbs[1])],
+ any_order=True
+ )
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+
+ # check older misplaced objects were not merged to shard brokers
+ self._check_objects(objects[:1], expected_shard_dbs[0])
+ self._check_objects(objects[1:], expected_shard_dbs[1])
+ # ... and removed from the source db
+ self._check_objects([], broker.db_file)
+
+ # the destination shard dbs for misplaced objects may already exist so
+ # check they are updated correctly when overwriting objects
+ # update source db with newer deleted versions of same objects
+ new_objects = [
+ ['b', self.ts_encoded(), 0, '', '', 1, 0],
+ ['x', self.ts_encoded(), 0, '', '', 1, 0]
+ ]
+ for obj in new_objects:
+ broker.put_object(*obj)
+ broker.get_info()
+ self._check_objects(new_objects, broker.db_file) # sanity check
+ shard_broker = ContainerBroker(
+ expected_shard_dbs[0], account=root_shard_ranges[0].account,
+ container=root_shard_ranges[0].container)
+ # update one shard container with even newer version of object
+ timestamps = [next(self.ts_iter) for i in range(7)]
+ ts_newer = encode_timestamps(
+ timestamps[1], timestamps[3], timestamps[5])
+ newer_object = ('b', ts_newer, 10, 'text/plain', 'etag_b', 0, 0)
+ shard_broker.put_object(*newer_object)
+
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+
+ sharder._replicate_object.assert_has_calls(
+ [mock.call(0, db, 0) for db in (expected_shard_dbs[0],
+ expected_shard_dbs[1])],
+ any_order=True
+ )
+ self._assert_stats(expected_stats, sharder, 'misplaced')
+ self.assertEqual(
+ 1, sharder.logger.get_increment_counts()['misplaced_found'])
+
+ # check only the newer misplaced object was moved
+ self._check_objects([newer_object], expected_shard_dbs[0])
+ self._check_objects(new_objects[1:], expected_shard_dbs[1])
+ # ... and removed from the source db
+ self._check_objects([], broker.db_file)
+
+ # update source with a version of 'b' that has newer data
+ # but older content-type and metadata relative to shard object
+ ts_update = encode_timestamps(
+ timestamps[2], timestamps[3], timestamps[4])
+ update_object = ('b', ts_update, 20, 'text/ignored', 'etag_newer', 0,
+ 0)
+ broker.put_object(*update_object)
+
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+
+ ts_expected = encode_timestamps(
+ timestamps[2], timestamps[3], timestamps[5])
+ expected = ('b', ts_expected, 20, 'text/plain', 'etag_newer', 0, 0)
+ self._check_objects([expected], expected_shard_dbs[0])
+ self._check_objects([], broker.db_file)
+
+ # update source with a version of 'b' that has older data
+ # and content-type but newer metadata relative to shard object
+ ts_update = encode_timestamps(
+ timestamps[1], timestamps[3], timestamps[6])
+ update_object = ('b', ts_update, 999, 'text/ignored', 'etag_b', 0, 0)
+ broker.put_object(*update_object)
+
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+
+ ts_expected = encode_timestamps(
+ timestamps[2], timestamps[3], timestamps[6])
+ expected = ('b', ts_expected, 20, 'text/plain', 'etag_newer', 0, 0)
+ self._check_objects([expected], expected_shard_dbs[0])
+ self._check_objects([], broker.db_file)
+
+ # update source with a version of 'b' that has older data
+ # but newer content-type and metadata
+ ts_update = encode_timestamps(
+ timestamps[2], timestamps[6], timestamps[6])
+ update_object = ('b', ts_update, 999, 'text/newer', 'etag_b', 0, 0)
+ broker.put_object(*update_object)
+
+ with self._mock_sharder() as sharder:
+ sharder._move_misplaced_objects(broker)
+
+ ts_expected = encode_timestamps(
+ timestamps[2], timestamps[6], timestamps[6])
+ expected = ('b', ts_expected, 20, 'text/newer', 'etag_newer', 0, 0)
+ self._check_objects([expected], expected_shard_dbs[0])
+ self._check_objects([], broker.db_file)
+
+ def _setup_find_ranges(self, account, cont, lower, upper):
+ broker = self._make_broker(account=account, container=cont)
+ own_sr = ShardRange('%s/%s' % (account, cont), Timestamp.now(),
+ lower, upper)
+ broker.merge_shard_ranges([own_sr])
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ objects = [
+ # some of these are misplaced objects
+ ['obj%3d' % i, self.ts_encoded(), i, 'text/plain', 'etag%s' % i, 0]
+ for i in range(100)]
+ for obj in objects:
+ broker.put_object(*obj)
+ return broker, objects
+
+ def _check_find_shard_ranges_none_found(self, broker, objects):
+ with self._mock_sharder() as sharder:
+ num_found = sharder._find_shard_ranges(broker)
+ self.assertGreater(sharder.split_size, len(objects))
+ self.assertEqual(0, num_found)
+ self.assertFalse(broker.get_shard_ranges())
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1,
+ 'found': 0, 'min_time': mock.ANY,
+ 'max_time': mock.ANY}
+ stats = self._assert_stats(expected_stats, sharder, 'scanned')
+ self.assertGreaterEqual(stats['max_time'], stats['min_time'])
+
+ with self._mock_sharder(
+ conf={'shard_container_threshold': 200}) as sharder:
+ num_found = sharder._find_shard_ranges(broker)
+ self.assertEqual(sharder.split_size, len(objects))
+ self.assertEqual(0, num_found)
+ self.assertFalse(broker.get_shard_ranges())
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1,
+ 'found': 0, 'min_time': mock.ANY,
+ 'max_time': mock.ANY}
+ stats = self._assert_stats(expected_stats, sharder, 'scanned')
+ self.assertGreaterEqual(stats['max_time'], stats['min_time'])
+
+ def test_find_shard_ranges_none_found_root(self):
+ broker, objects = self._setup_find_ranges('a', 'c', '', '')
+ self._check_find_shard_ranges_none_found(broker, objects)
+
+ def test_find_shard_ranges_none_found_shard(self):
+ broker, objects = self._setup_find_ranges(
+ '.shards_a', 'c', 'lower', 'upper')
+ self._check_find_shard_ranges_none_found(broker, objects)
+
+ def _check_find_shard_ranges_finds_two(self, account, cont, lower, upper):
+ def check_ranges():
+ self.assertEqual(2, len(broker.get_shard_ranges()))
+ expected_ranges = [
+ ShardRange(
+ ShardRange.make_path('.int_shards_a', 'c', cont, now, 0),
+ now, lower, objects[98][0], 99),
+ ShardRange(
+ ShardRange.make_path('.int_shards_a', 'c', cont, now, 1),
+ now, objects[98][0], upper, 1),
+ ]
+ self._assert_shard_ranges_equal(expected_ranges,
+ broker.get_shard_ranges())
+
+ # first invocation finds both ranges
+ broker, objects = self._setup_find_ranges(
+ account, cont, lower, upper)
+ with self._mock_sharder(conf={'shard_container_threshold': 199,
+ 'auto_create_account_prefix': '.int_'}
+ ) as sharder:
+ with mock_timestamp_now() as now:
+ num_found = sharder._find_shard_ranges(broker)
+ self.assertEqual(99, sharder.split_size)
+ self.assertEqual(2, num_found)
+ check_ranges()
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 2, 'min_time': mock.ANY,
+ 'max_time': mock.ANY}
+ stats = self._assert_stats(expected_stats, sharder, 'scanned')
+ self.assertGreaterEqual(stats['max_time'], stats['min_time'])
+
+ # second invocation finds none
+ with self._mock_sharder(conf={'shard_container_threshold': 199,
+ 'auto_create_account_prefix': '.int_'}
+ ) as sharder:
+ num_found = sharder._find_shard_ranges(broker)
+ self.assertEqual(0, num_found)
+ self.assertEqual(2, len(broker.get_shard_ranges()))
+ check_ranges()
+ expected_stats = {'attempted': 0, 'success': 0, 'failure': 0,
+ 'found': 0, 'min_time': mock.ANY,
+ 'max_time': mock.ANY}
+ stats = self._assert_stats(expected_stats, sharder, 'scanned')
+ self.assertGreaterEqual(stats['max_time'], stats['min_time'])
+
+ def test_find_shard_ranges_finds_two_root(self):
+ self._check_find_shard_ranges_finds_two('a', 'c', '', '')
+
+ def test_find_shard_ranges_finds_two_shard(self):
+ self._check_find_shard_ranges_finds_two('.shards_a', 'c_', 'l', 'u')
+
+ def _check_find_shard_ranges_finds_three(self, account, cont, lower,
+ upper):
+ broker, objects = self._setup_find_ranges(
+ account, cont, lower, upper)
+ now = Timestamp.now()
+ expected_ranges = [
+ ShardRange(
+ ShardRange.make_path('.shards_a', 'c', cont, now, 0),
+ now, lower, objects[44][0], 45),
+ ShardRange(
+ ShardRange.make_path('.shards_a', 'c', cont, now, 1),
+ now, objects[44][0], objects[89][0], 45),
+ ShardRange(
+ ShardRange.make_path('.shards_a', 'c', cont, now, 2),
+ now, objects[89][0], upper, 10),
+ ]
+ # first invocation finds 2 ranges
+ with self._mock_sharder(
+ conf={'shard_container_threshold': 90,
+ 'shard_scanner_batch_size': 2}) as sharder:
+ with mock_timestamp_now(now):
+ num_found = sharder._find_shard_ranges(broker)
+ self.assertEqual(45, sharder.split_size)
+ self.assertEqual(2, num_found)
+ self.assertEqual(2, len(broker.get_shard_ranges()))
+ self._assert_shard_ranges_equal(expected_ranges[:2],
+ broker.get_shard_ranges())
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 2, 'min_time': mock.ANY,
+ 'max_time': mock.ANY}
+ stats = self._assert_stats(expected_stats, sharder, 'scanned')
+ self.assertGreaterEqual(stats['max_time'], stats['min_time'])
+
+ # second invocation finds third shard range
+ with self._mock_sharder(conf={'shard_container_threshold': 199,
+ 'shard_scanner_batch_size': 2}
+ ) as sharder:
+ with mock_timestamp_now(now):
+ num_found = sharder._find_shard_ranges(broker)
+ self.assertEqual(1, num_found)
+ self.assertEqual(3, len(broker.get_shard_ranges()))
+ self._assert_shard_ranges_equal(expected_ranges,
+ broker.get_shard_ranges())
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
+ 'found': 1, 'min_time': mock.ANY,
+ 'max_time': mock.ANY}
+ stats = self._assert_stats(expected_stats, sharder, 'scanned')
+ self.assertGreaterEqual(stats['max_time'], stats['min_time'])
+
+ # third invocation finds none
+ with self._mock_sharder(conf={'shard_container_threshold': 199,
+ 'shard_scanner_batch_size': 2}
+ ) as sharder:
+ sharder._send_shard_ranges = mock.MagicMock(return_value=True)
+ num_found = sharder._find_shard_ranges(broker)
+ self.assertEqual(0, num_found)
+ self.assertEqual(3, len(broker.get_shard_ranges()))
+ self._assert_shard_ranges_equal(expected_ranges,
+ broker.get_shard_ranges())
+ expected_stats = {'attempted': 0, 'success': 0, 'failure': 0,
+ 'found': 0, 'min_time': mock.ANY,
+ 'max_time': mock.ANY}
+ stats = self._assert_stats(expected_stats, sharder, 'scanned')
+ self.assertGreaterEqual(stats['max_time'], stats['min_time'])
+
+ def test_find_shard_ranges_finds_three_root(self):
+ self._check_find_shard_ranges_finds_three('a', 'c', '', '')
+
+ def test_find_shard_ranges_finds_three_shard(self):
+ self._check_find_shard_ranges_finds_three('.shards_a', 'c_', 'l', 'u')
+
+ def test_sharding_enabled(self):
+ broker = self._make_broker()
+ self.assertFalse(sharding_enabled(broker))
+ broker.update_metadata(
+ {'X-Container-Sysmeta-Sharding':
+ ('yes', Timestamp.now().internal)})
+ self.assertTrue(sharding_enabled(broker))
+ # deleting broker clears sharding sysmeta
+ broker.delete_db(Timestamp.now().internal)
+ self.assertFalse(sharding_enabled(broker))
+ # but if broker has a shard range then sharding is enabled
+ broker.merge_shard_ranges(
+ ShardRange('acc/a_shard', Timestamp.now(), 'l', 'u'))
+ self.assertTrue(sharding_enabled(broker))
+
+ def test_send_shard_ranges(self):
+ shard_ranges = self._make_shard_ranges((('', 'h'), ('h', '')))
+
+ def do_test(replicas, *resp_codes):
+ sent_data = defaultdict(str)
+
+ def on_send(fake_conn, data):
+ sent_data[fake_conn] += data
+
+ with self._mock_sharder(replicas=replicas) as sharder:
+ with mocked_http_conn(*resp_codes, give_send=on_send) as conn:
+ with mock_timestamp_now() as now:
+ res = sharder._send_shard_ranges(
+ 'a', 'c', shard_ranges)
+
+ self.assertEqual(sharder.ring.replica_count, len(conn.requests))
+ expected_body = json.dumps([dict(sr) for sr in shard_ranges])
+ expected_headers = {'Content-Type': 'application/json',
+ 'Content-Length': str(len(expected_body)),
+ 'X-Timestamp': now.internal,
+ 'X-Backend-Record-Type': 'shard',
+ 'User-Agent': mock.ANY}
+ for data in sent_data.values():
+ self.assertEqual(expected_body, data)
+ hosts = set()
+ for req in conn.requests:
+ path_parts = req['path'].split('/')[1:]
+ hosts.add('%s:%s/%s' % (req['ip'], req['port'], path_parts[0]))
+ # FakeRing only has one partition
+ self.assertEqual('0', path_parts[1])
+ self.assertEqual('PUT', req['method'])
+ self.assertEqual(['a', 'c'], path_parts[-2:])
+ req_headers = req['headers']
+ for k, v in expected_headers.items():
+ self.assertEqual(v, req_headers[k])
+ self.assertTrue(
+ req_headers['User-Agent'].startswith('container-sharder'))
+ self.assertEqual(sharder.ring.replica_count, len(hosts))
+ return res, sharder
+
+ replicas = 3
+ res, sharder = do_test(replicas, 202, 202, 202)
+ self.assertTrue(res)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, 202, 202, 404)
+ self.assertTrue(res)
+ self.assertEqual([True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('warning')])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, 202, 202, Exception)
+ self.assertTrue(res)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertEqual([True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('error')])
+ res, sharder = do_test(replicas, 202, 404, 404)
+ self.assertFalse(res)
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('warning')])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, 500, 500, 500)
+ self.assertFalse(res)
+ self.assertEqual([True, True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('warning')])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, Exception, Exception, 202)
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('error')])
+ res, sharder = do_test(replicas, Exception, eventlet.Timeout(), 202)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('error')])
+
+ replicas = 2
+ res, sharder = do_test(replicas, 202, 202)
+ self.assertTrue(res)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, 202, 404)
+ self.assertTrue(res)
+ self.assertEqual([True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('warning')])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, 202, Exception)
+ self.assertTrue(res)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertEqual([True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('error')])
+ res, sharder = do_test(replicas, 404, 404)
+ self.assertFalse(res)
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('warning')])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, Exception, Exception)
+ self.assertFalse(res)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('error')])
+ res, sharder = do_test(replicas, eventlet.Timeout(), Exception)
+ self.assertFalse(res)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('error')])
+
+ replicas = 4
+ res, sharder = do_test(replicas, 202, 202, 202, 202)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ self.assertTrue(res)
+ res, sharder = do_test(replicas, 202, 202, 404, 404)
+ self.assertTrue(res)
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('warning')])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, 202, 202, Exception, Exception)
+ self.assertTrue(res)
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('error')])
+ res, sharder = do_test(replicas, 202, 404, 404, 404)
+ self.assertFalse(res)
+ self.assertEqual([True, True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('warning')])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, 500, 500, 500, 202)
+ self.assertFalse(res)
+ self.assertEqual([True, True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('warning')])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ res, sharder = do_test(replicas, Exception, Exception, 202, 404)
+ self.assertFalse(res)
+ self.assertEqual([True], [
+ all(msg in line for msg in ('Failed to put shard ranges', '404'))
+ for line in sharder.logger.get_lines_for_level('warning')])
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('error')])
+ res, sharder = do_test(
+ replicas, eventlet.Timeout(), eventlet.Timeout(), 202, 404)
+ self.assertFalse(res)
+ self.assertEqual([True], [
+ all(msg in line for msg in ('Failed to put shard ranges', '404'))
+ for line in sharder.logger.get_lines_for_level('warning')])
+ self.assertEqual([True, True], [
+ 'Failed to put shard ranges' in line for line in
+ sharder.logger.get_lines_for_level('error')])
+
+ def test_process_broker_not_sharding_no_others(self):
+ # verify that sharding process will not start when own shard range is
+ # missing or in wrong state or there are no other shard ranges
+ broker = self._make_broker()
+ node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
+ 'index': 0}
+ # sanity check
+ self.assertIsNone(broker.get_own_shard_range(no_default=True))
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+
+ # no own shard range
+ with self._mock_sharder() as sharder:
+ sharder._process_broker(broker, node, 99)
+ self.assertIsNone(broker.get_own_shard_range(no_default=True))
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+ self.assertFalse(broker.logger.get_lines_for_level('warning'))
+ self.assertFalse(broker.logger.get_lines_for_level('error'))
+ broker.logger.clear()
+
+ # now add own shard range
+ for state in sorted(ShardRange.STATES):
+ own_sr = broker.get_own_shard_range() # returns the default
+ own_sr.update_state(state)
+ broker.merge_shard_ranges([own_sr])
+ with mock.patch.object(
+ broker, 'set_sharding_state') as mock_set_sharding_state:
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ with mock.patch.object(sharder, '_audit_container'):
+ sharder.logger = debug_logger()
+ sharder._process_broker(broker, node, 99)
+ own_shard_range = broker.get_own_shard_range(
+ no_default=True)
+ mock_set_sharding_state.assert_not_called()
+ self.assertEqual(dict(own_sr, meta_timestamp=now),
+ dict(own_shard_range))
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+ self.assertFalse(broker.logger.get_lines_for_level('warning'))
+ self.assertFalse(broker.logger.get_lines_for_level('error'))
+ broker.logger.clear()
+
+ def _check_process_broker_sharding_no_others(self, state):
+ # verify that when existing own_shard_range has given state and there
+ # are other shard ranges then the sharding process will begin
+ broker = self._make_broker(hash_='hash%s' % state)
+ node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
+ 'index': 0}
+ own_sr = broker.get_own_shard_range()
+ self.assertTrue(own_sr.update_state(state))
+ epoch = Timestamp.now()
+ own_sr.epoch = epoch
+ shard_ranges = self._make_shard_ranges((('', 'm'), ('m', '')))
+ broker.merge_shard_ranges([own_sr] + shard_ranges)
+
+ with self._mock_sharder() as sharder:
+ with mock.patch.object(
+ sharder, '_create_shard_containers', return_value=0):
+ with mock_timestamp_now() as now:
+ sharder._audit_container = mock.MagicMock()
+ sharder._process_broker(broker, node, 99)
+ final_own_sr = broker.get_own_shard_range(no_default=True)
+
+ self.assertEqual(dict(own_sr, meta_timestamp=now),
+ dict(final_own_sr))
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(epoch.normal, parse_db_filename(broker.db_file)[1])
+ self.assertFalse(broker.logger.get_lines_for_level('warning'))
+ self.assertFalse(broker.logger.get_lines_for_level('error'))
+
+ def test_process_broker_sharding_with_own_shard_range_no_others(self):
+ self._check_process_broker_sharding_no_others(ShardRange.SHARDING)
+ self._check_process_broker_sharding_no_others(ShardRange.SHRINKING)
+
+ def test_process_broker_not_sharding_others(self):
+ # verify that sharding process will not start when own shard range is
+ # missing or in wrong state even when other shard ranges are in the db
+ broker = self._make_broker()
+ node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
+ 'index': 0}
+ # sanity check
+ self.assertIsNone(broker.get_own_shard_range(no_default=True))
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+
+ # add shard ranges - but not own
+ shard_ranges = self._make_shard_ranges((('', 'h'), ('h', '')))
+ broker.merge_shard_ranges(shard_ranges)
+
+ with self._mock_sharder() as sharder:
+ sharder._process_broker(broker, node, 99)
+ self.assertIsNone(broker.get_own_shard_range(no_default=True))
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+ self.assertFalse(broker.logger.get_lines_for_level('warning'))
+ self.assertFalse(broker.logger.get_lines_for_level('error'))
+ broker.logger.clear()
+
+ # now add own shard range
+ for state in sorted(ShardRange.STATES):
+ if state in (ShardRange.SHARDING,
+ ShardRange.SHRINKING,
+ ShardRange.SHARDED):
+ epoch = None
+ else:
+ epoch = Timestamp.now()
+
+ own_sr = broker.get_own_shard_range() # returns the default
+ own_sr.update_state(state)
+ own_sr.epoch = epoch
+ broker.merge_shard_ranges([own_sr])
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ sharder._process_broker(broker, node, 99)
+ own_shard_range = broker.get_own_shard_range(
+ no_default=True)
+ self.assertEqual(dict(own_sr, meta_timestamp=now),
+ dict(own_shard_range))
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+ if epoch:
+ self.assertFalse(broker.logger.get_lines_for_level('warning'))
+ else:
+ self.assertIn('missing epoch',
+ broker.logger.get_lines_for_level('warning')[0])
+ self.assertFalse(broker.logger.get_lines_for_level('error'))
+ broker.logger.clear()
+
+ def _check_process_broker_sharding_others(self, state):
+ # verify states in which own_shard_range will cause sharding
+ # process to start when other shard ranges are in the db
+ broker = self._make_broker(hash_='hash%s' % state)
+ node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2',
+ 'index': 0}
+ # add shard ranges - but not own
+ shard_ranges = self._make_shard_ranges((('', 'h'), ('h', '')))
+ broker.merge_shard_ranges(shard_ranges)
+ # sanity check
+ self.assertIsNone(broker.get_own_shard_range(no_default=True))
+ self.assertEqual(UNSHARDED, broker.get_db_state())
+
+ # now set own shard range to given state and persist it
+ own_sr = broker.get_own_shard_range() # returns the default
+ self.assertTrue(own_sr.update_state(state))
+ epoch = Timestamp.now()
+ own_sr.epoch = epoch
+ broker.merge_shard_ranges([own_sr])
+ with self._mock_sharder() as sharder:
+
+ sharder.logger = debug_logger()
+ with mock_timestamp_now() as now:
+ # we're not testing rest of the process here so prevent any
+ # attempt to progress shard range states
+ sharder._create_shard_containers = lambda *args: 0
+ sharder._process_broker(broker, node, 99)
+ own_shard_range = broker.get_own_shard_range(no_default=True)
+
+ self.assertEqual(dict(own_sr, meta_timestamp=now),
+ dict(own_shard_range))
+ self.assertEqual(SHARDING, broker.get_db_state())
+ self.assertEqual(epoch.normal, parse_db_filename(broker.db_file)[1])
+ self.assertFalse(broker.logger.get_lines_for_level('warning'))
+ self.assertFalse(broker.logger.get_lines_for_level('error'))
+
+ def test_process_broker_sharding_with_own_shard_range_and_others(self):
+ self._check_process_broker_sharding_others(ShardRange.SHARDING)
+ self._check_process_broker_sharding_others(ShardRange.SHRINKING)
+ self._check_process_broker_sharding_others(ShardRange.SHARDED)
+
+ def check_shard_ranges_sent(self, broker, expected_sent):
+ bodies = []
+
+ def capture_send(conn, data):
+ bodies.append(data)
+
+ with self._mock_sharder() as sharder:
+ with mocked_http_conn(204, 204, 204,
+ give_send=capture_send) as mock_conn:
+ sharder._update_root_container(broker)
+
+ for req in mock_conn.requests:
+ self.assertEqual('PUT', req['method'])
+ self.assertEqual([expected_sent] * 3,
+ [json.loads(b) for b in bodies])
+
+ def test_update_root_container_own_range(self):
+ broker = self._make_broker()
+
+ # nothing to send
+ with self._mock_sharder() as sharder:
+ with mocked_http_conn() as mock_conn:
+ sharder._update_root_container(broker)
+ self.assertFalse(mock_conn.requests)
+
+ def check_only_own_shard_range_sent(state):
+ own_shard_range = broker.get_own_shard_range()
+ self.assertTrue(own_shard_range.update_state(
+ state, state_timestamp=next(self.ts_iter)))
+ broker.merge_shard_ranges([own_shard_range])
+ # add an object, expect to see it reflected in the own shard range
+ # that is sent
+ broker.put_object(str(own_shard_range.object_count + 1),
+ next(self.ts_iter).internal, 1, '', '')
+ with mock_timestamp_now() as now:
+ # force own shard range meta updates to be at fixed timestamp
+ expected_sent = [
+ dict(own_shard_range,
+ meta_timestamp=now.internal,
+ object_count=own_shard_range.object_count + 1,
+ bytes_used=own_shard_range.bytes_used + 1)]
+ self.check_shard_ranges_sent(broker, expected_sent)
+
+ for state in ShardRange.STATES:
+ with annotate_failure(state):
+ check_only_own_shard_range_sent(state)
+
+ def test_update_root_container_all_ranges(self):
+ broker = self._make_broker()
+ other_shard_ranges = self._make_shard_ranges((('', 'h'), ('h', '')))
+ self.assertTrue(other_shard_ranges[0].set_deleted())
+ broker.merge_shard_ranges(other_shard_ranges)
+
+ # own range missing - send nothing
+ with self._mock_sharder() as sharder:
+ with mocked_http_conn() as mock_conn:
+ sharder._update_root_container(broker)
+ self.assertFalse(mock_conn.requests)
+
+ def check_all_shard_ranges_sent(state):
+ own_shard_range = broker.get_own_shard_range()
+ self.assertTrue(own_shard_range.update_state(
+ state, state_timestamp=next(self.ts_iter)))
+ broker.merge_shard_ranges([own_shard_range])
+ # add an object, expect to see it reflected in the own shard range
+ # that is sent
+ broker.put_object(str(own_shard_range.object_count + 1),
+ next(self.ts_iter).internal, 1, '', '')
+ with mock_timestamp_now() as now:
+ shard_ranges = broker.get_shard_ranges(include_deleted=True)
+ expected_sent = sorted([
+ own_shard_range.copy(
+ meta_timestamp=now.internal,
+ object_count=own_shard_range.object_count + 1,
+ bytes_used=own_shard_range.bytes_used + 1)] +
+ shard_ranges,
+ key=lambda sr: (sr.upper, sr.state, sr.lower))
+ self.check_shard_ranges_sent(
+ broker, [dict(sr) for sr in expected_sent])
+
+ for state in ShardRange.STATES.keys():
+ with annotate_failure(state):
+ check_all_shard_ranges_sent(state)
+
+ def test_audit_root_container(self):
+ broker = self._make_broker()
+
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0}
+ with self._mock_sharder() as sharder:
+ with mock.patch.object(
+ sharder, '_audit_shard_container') as mocked:
+ sharder._audit_container(broker)
+ self._assert_stats(expected_stats, sharder, 'audit_root')
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ mocked.assert_not_called()
+
+ def assert_overlap_warning(line, state_text):
+ self.assertIn(
+ 'Audit failed for root %s' % broker.db_file, line)
+ self.assertIn(
+ 'overlapping ranges in state %s: k-t s-z' % state_text,
+ line)
+
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1}
+ shard_bounds = (('a', 'j'), ('k', 't'), ('s', 'z'))
+ for state, state_text in ShardRange.STATES.items():
+ shard_ranges = self._make_shard_ranges(shard_bounds, state)
+ broker.merge_shard_ranges(shard_ranges)
+ with self._mock_sharder() as sharder:
+ with mock.patch.object(
+ sharder, '_audit_shard_container') as mocked:
+ sharder._audit_container(broker)
+ lines = sharder.logger.get_lines_for_level('warning')
+ assert_overlap_warning(lines[0], state_text)
+ self.assertFalse(lines[1:])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ self._assert_stats(expected_stats, sharder, 'audit_root')
+ mocked.assert_not_called()
+
+ def assert_missing_warning(line):
+ self.assertIn(
+ 'Audit failed for root %s' % broker.db_file, line)
+ self.assertIn('missing range(s): -a j-k z-', line)
+
+ own_shard_range = broker.get_own_shard_range()
+ states = (ShardRange.SHARDING, ShardRange.SHARDED)
+ for state in states:
+ own_shard_range.update_state(
+ state, state_timestamp=next(self.ts_iter))
+ broker.merge_shard_ranges([own_shard_range])
+ with self._mock_sharder() as sharder:
+ with mock.patch.object(
+ sharder, '_audit_shard_container') as mocked:
+ sharder._audit_container(broker)
+ lines = sharder.logger.get_lines_for_level('warning')
+ assert_missing_warning(lines[0])
+ assert_overlap_warning(lines[0], state_text)
+ self.assertFalse(lines[1:])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ self._assert_stats(expected_stats, sharder, 'audit_root')
+ mocked.assert_not_called()
+
+ def test_audit_shard_container(self):
+ broker = self._make_broker(account='.shards_a', container='shard_c')
+ broker.set_sharding_sysmeta('Root', 'a/c')
+ # include overlaps to verify correct match for updating own shard range
+ shard_bounds = (
+ ('a', 'j'), ('k', 't'), ('k', 's'), ('l', 's'), ('s', 'z'))
+ shard_ranges = self._make_shard_ranges(shard_bounds, ShardRange.ACTIVE)
+ shard_ranges[1].name = broker.path
+ expected_stats = {'attempted': 1, 'success': 0, 'failure': 1}
+
+ def call_audit_container(exc=None):
+ with self._mock_sharder() as sharder:
+ sharder.logger = debug_logger()
+ with mock.patch.object(sharder, '_audit_root_container') \
+ as mocked, mock.patch.object(
+ sharder, 'int_client') as mock_swift:
+ mock_response = mock.MagicMock()
+ mock_response.headers = {'x-backend-record-type':
+ 'shard'}
+ mock_response.body = json.dumps(
+ [dict(sr) for sr in shard_ranges])
+ mock_swift.make_request.return_value = mock_response
+ mock_swift.make_request.side_effect = exc
+ mock_swift.make_path = (lambda a, c:
+ '/v1/%s/%s' % (a, c))
+ sharder.reclaim_age = 0
+ sharder._audit_container(broker)
+ mocked.assert_not_called()
+ return sharder, mock_swift
+
+ # bad account name
+ broker.account = 'bad_account'
+ sharder, mock_swift = call_audit_container()
+ lines = sharder.logger.get_lines_for_level('warning')
+ self._assert_stats(expected_stats, sharder, 'audit_shard')
+ self.assertIn('Audit warnings for shard %s' % broker.db_file, lines[0])
+ self.assertIn('account not in shards namespace', lines[0])
+ self.assertNotIn('root has no matching shard range', lines[0])
+ self.assertNotIn('unable to get shard ranges from root', lines[0])
+ self.assertIn('Audit failed for shard %s' % broker.db_file, lines[1])
+ self.assertIn('missing own shard range', lines[1])
+ self.assertFalse(lines[2:])
+ self.assertFalse(broker.is_deleted())
+
+ # missing own shard range
+ broker.get_info()
+ sharder, mock_swift = call_audit_container()
+ lines = sharder.logger.get_lines_for_level('warning')
+ self._assert_stats(expected_stats, sharder, 'audit_shard')
+ self.assertIn('Audit failed for shard %s' % broker.db_file, lines[0])
+ self.assertIn('missing own shard range', lines[0])
+ self.assertNotIn('unable to get shard ranges from root', lines[0])
+ self.assertFalse(lines[1:])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ self.assertFalse(broker.is_deleted())
+
+ # create own shard range, no match in root
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0}
+ own_shard_range = broker.get_own_shard_range() # get the default
+ own_shard_range.lower = 'j'
+ own_shard_range.upper = 'k'
+ broker.merge_shard_ranges([own_shard_range])
+ sharder, mock_swift = call_audit_container()
+ lines = sharder.logger.get_lines_for_level('warning')
+ self.assertIn('Audit warnings for shard %s' % broker.db_file, lines[0])
+ self.assertNotIn('account not in shards namespace', lines[0])
+ self.assertNotIn('missing own shard range', lines[0])
+ self.assertIn('root has no matching shard range', lines[0])
+ self.assertNotIn('unable to get shard ranges from root', lines[0])
+ self._assert_stats(expected_stats, sharder, 'audit_shard')
+ self.assertFalse(lines[1:])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ self.assertFalse(broker.is_deleted())
+ expected_headers = {'X-Backend-Record-Type': 'shard',
+ 'X-Newest': 'true',
+ 'X-Backend-Include-Deleted': 'True',
+ 'X-Backend-Override-Deleted': 'true'}
+ params = {'format': 'json', 'marker': 'j', 'end_marker': 'k'}
+ mock_swift.make_request.assert_called_once_with(
+ 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,),
+ params=params)
+
+ # create own shard range, failed response from root
+ expected_stats = {'attempted': 1, 'success': 1, 'failure': 0}
+ own_shard_range = broker.get_own_shard_range() # get the default
+ own_shard_range.lower = 'j'
+ own_shard_range.upper = 'k'
+ broker.merge_shard_ranges([own_shard_range])
+ sharder, mock_swift = call_audit_container(
+ exc=internal_client.UnexpectedResponse('bad', 'resp'))
+ lines = sharder.logger.get_lines_for_level('warning')
+ self.assertIn('Failed to get shard ranges', lines[0])
+ self.assertIn('Audit warnings for shard %s' % broker.db_file, lines[1])
+ self.assertNotIn('account not in shards namespace', lines[1])
+ self.assertNotIn('missing own shard range', lines[1])
+ self.assertNotIn('root has no matching shard range', lines[1])
+ self.assertIn('unable to get shard ranges from root', lines[1])
+ self._assert_stats(expected_stats, sharder, 'audit_shard')
+ self.assertFalse(lines[2:])
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ self.assertFalse(broker.is_deleted())
+ mock_swift.make_request.assert_called_once_with(
+ 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,),
+ params=params)
+
+ def assert_ok():
+ sharder, mock_swift = call_audit_container()
+ self.assertFalse(sharder.logger.get_lines_for_level('warning'))
+ self.assertFalse(sharder.logger.get_lines_for_level('error'))
+ self._assert_stats(expected_stats, sharder, 'audit_shard')
+ params = {'format': 'json', 'marker': 'k', 'end_marker': 't'}
+ mock_swift.make_request.assert_called_once_with(
+ 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,),
+ params=params)
+
+ # make own shard range match one in root, but different state
+ shard_ranges[1].timestamp = Timestamp.now()
+ broker.merge_shard_ranges([shard_ranges[1]])
+ now = Timestamp.now()
+ shard_ranges[1].update_state(ShardRange.SHARDING, state_timestamp=now)
+ assert_ok()
+ self.assertFalse(broker.is_deleted())
+ # own shard range state is updated from root version
+ own_shard_range = broker.get_own_shard_range()
+ self.assertEqual(ShardRange.SHARDING, own_shard_range.state)
+ self.assertEqual(now, own_shard_range.state_timestamp)
+
+ own_shard_range.update_state(ShardRange.SHARDED,
+ state_timestamp=Timestamp.now())
+ broker.merge_shard_ranges([own_shard_range])
+ assert_ok()
+
+ own_shard_range.deleted = 1
+ own_shard_range.timestamp = Timestamp.now()
+ broker.merge_shard_ranges([own_shard_range])
+ assert_ok()
+ self.assertTrue(broker.is_deleted())
+
+ def test_find_and_enable_sharding_candidates(self):
+ broker = self._make_broker()
+ broker.enable_sharding(next(self.ts_iter))
+ shard_bounds = (('', 'here'), ('here', 'there'), ('there', ''))
+ shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.CLEAVED)
+ shard_ranges[0].state = ShardRange.ACTIVE
+ broker.merge_shard_ranges(shard_ranges)
+ self.assertTrue(broker.set_sharding_state())
+ self.assertTrue(broker.set_sharded_state())
+ with self._mock_sharder() as sharder:
+ sharder._find_and_enable_sharding_candidates(broker)
+
+ # one range just below threshold
+ shard_ranges[0].update_meta(sharder.shard_container_threshold - 1, 0)
+ broker.merge_shard_ranges(shard_ranges[0])
+ with self._mock_sharder() as sharder:
+ sharder._find_and_enable_sharding_candidates(broker)
+ self._assert_shard_ranges_equal(shard_ranges,
+ broker.get_shard_ranges())
+
+ # two ranges above threshold, only one ACTIVE
+ shard_ranges[0].update_meta(sharder.shard_container_threshold, 0)
+ shard_ranges[2].update_meta(sharder.shard_container_threshold + 1, 0)
+ broker.merge_shard_ranges([shard_ranges[0], shard_ranges[2]])
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ sharder._find_and_enable_sharding_candidates(broker)
+ expected = shard_ranges[0].copy(state=ShardRange.SHARDING,
+ state_timestamp=now, epoch=now)
+ self._assert_shard_ranges_equal([expected] + shard_ranges[1:],
+ broker.get_shard_ranges())
+
+ # check idempotency
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ sharder._find_and_enable_sharding_candidates(broker)
+ self._assert_shard_ranges_equal([expected] + shard_ranges[1:],
+ broker.get_shard_ranges())
+
+ # two ranges above threshold, both ACTIVE
+ shard_ranges[2].update_state(ShardRange.ACTIVE)
+ broker.merge_shard_ranges(shard_ranges[2])
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ sharder._find_and_enable_sharding_candidates(broker)
+ expected_2 = shard_ranges[2].copy(state=ShardRange.SHARDING,
+ state_timestamp=now, epoch=now)
+ self._assert_shard_ranges_equal(
+ [expected, shard_ranges[1], expected_2], broker.get_shard_ranges())
+
+ # check idempotency
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ sharder._find_and_enable_sharding_candidates(broker)
+ self._assert_shard_ranges_equal(
+ [expected, shard_ranges[1], expected_2], broker.get_shard_ranges())
+
+ def test_find_and_enable_sharding_candidates_bootstrap(self):
+ broker = self._make_broker()
+ with self._mock_sharder(
+ conf={'shard_container_threshold': 1}) as sharder:
+ sharder._find_and_enable_sharding_candidates(broker)
+ self.assertEqual(ShardRange.ACTIVE, broker.get_own_shard_range().state)
+ broker.put_object('obj', next(self.ts_iter).internal, 1, '', '')
+ self.assertEqual(1, broker.get_info()['object_count'])
+ with self._mock_sharder(
+ conf={'shard_container_threshold': 1}) as sharder:
+ with mock_timestamp_now() as now:
+ sharder._find_and_enable_sharding_candidates(
+ broker, [broker.get_own_shard_range()])
+ own_sr = broker.get_own_shard_range()
+ self.assertEqual(ShardRange.SHARDING, own_sr.state)
+ self.assertEqual(now, own_sr.state_timestamp)
+ self.assertEqual(now, own_sr.epoch)
+
+ # check idempotency
+ with self._mock_sharder(
+ conf={'shard_container_threshold': 1}) as sharder:
+ with mock_timestamp_now():
+ sharder._find_and_enable_sharding_candidates(
+ broker, [broker.get_own_shard_range()])
+ own_sr = broker.get_own_shard_range()
+ self.assertEqual(ShardRange.SHARDING, own_sr.state)
+ self.assertEqual(now, own_sr.state_timestamp)
+ self.assertEqual(now, own_sr.epoch)
+
+ def test_find_and_enable_shrinking_candidates(self):
+ broker = self._make_broker()
+ broker.enable_sharding(next(self.ts_iter))
+ shard_bounds = (('', 'here'), ('here', 'there'), ('there', ''))
+ size = (DEFAULT_SHARD_SHRINK_POINT *
+ DEFAULT_SHARD_CONTAINER_THRESHOLD / 100)
+ shard_ranges = self._make_shard_ranges(
+ shard_bounds, state=ShardRange.ACTIVE, object_count=size)
+ broker.merge_shard_ranges(shard_ranges)
+ self.assertTrue(broker.set_sharding_state())
+ self.assertTrue(broker.set_sharded_state())
+ with self._mock_sharder() as sharder:
+ sharder._find_and_enable_shrinking_candidates(broker)
+ self._assert_shard_ranges_equal(shard_ranges,
+ broker.get_shard_ranges())
+
+ # one range just below threshold
+ shard_ranges[0].update_meta(size - 1, 0)
+ broker.merge_shard_ranges(shard_ranges[0])
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ sharder._send_shard_ranges = mock.MagicMock()
+ sharder._find_and_enable_shrinking_candidates(broker)
+ acceptor = shard_ranges[1].copy(lower=shard_ranges[0].lower)
+ acceptor.timestamp = now
+ donor = shard_ranges[0].copy(state=ShardRange.SHRINKING,
+ state_timestamp=now, epoch=now)
+ self._assert_shard_ranges_equal([donor, acceptor, shard_ranges[2]],
+ broker.get_shard_ranges())
+ sharder._send_shard_ranges.assert_has_calls(
+ [mock.call(acceptor.account, acceptor.container, [acceptor]),
+ mock.call(donor.account, donor.container, [donor, acceptor])]
+ )
+
+ # check idempotency
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ sharder._send_shard_ranges = mock.MagicMock()
+ sharder._find_and_enable_shrinking_candidates(broker)
+ self._assert_shard_ranges_equal([donor, acceptor, shard_ranges[2]],
+ broker.get_shard_ranges())
+ sharder._send_shard_ranges.assert_has_calls(
+ [mock.call(acceptor.account, acceptor.container, [acceptor]),
+ mock.call(donor.account, donor.container, [donor, acceptor])]
+ )
+
+ # acceptor falls below threshold - not a candidate
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ acceptor.update_meta(0, 0, meta_timestamp=now)
+ broker.merge_shard_ranges(acceptor)
+ sharder._send_shard_ranges = mock.MagicMock()
+ sharder._find_and_enable_shrinking_candidates(broker)
+ self._assert_shard_ranges_equal([donor, acceptor, shard_ranges[2]],
+ broker.get_shard_ranges())
+ sharder._send_shard_ranges.assert_has_calls(
+ [mock.call(acceptor.account, acceptor.container, [acceptor]),
+ mock.call(donor.account, donor.container, [donor, acceptor])]
+ )
+
+ # ...until donor has shrunk
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ donor.update_state(ShardRange.SHARDED, state_timestamp=now)
+ donor.set_deleted(timestamp=now)
+ broker.merge_shard_ranges(donor)
+ sharder._send_shard_ranges = mock.MagicMock()
+ sharder._find_and_enable_shrinking_candidates(broker)
+ new_acceptor = shard_ranges[2].copy(lower=acceptor.lower)
+ new_acceptor.timestamp = now
+ new_donor = acceptor.copy(state=ShardRange.SHRINKING,
+ state_timestamp=now, epoch=now)
+ self._assert_shard_ranges_equal(
+ [donor, new_donor, new_acceptor],
+ broker.get_shard_ranges(include_deleted=True))
+ sharder._send_shard_ranges.assert_has_calls(
+ [mock.call(new_acceptor.account, new_acceptor.container,
+ [new_acceptor]),
+ mock.call(new_donor.account, new_donor.container,
+ [new_donor, new_acceptor])]
+ )
+
+ # ..finally last shard shrinks to root
+ with self._mock_sharder() as sharder:
+ with mock_timestamp_now() as now:
+ new_donor.update_state(ShardRange.SHARDED, state_timestamp=now)
+ new_donor.set_deleted(timestamp=now)
+ new_acceptor.update_meta(0, 0, meta_timestamp=now)
+ broker.merge_shard_ranges([new_donor, new_acceptor])
+ sharder._send_shard_ranges = mock.MagicMock()
+ sharder._find_and_enable_shrinking_candidates(broker)
+ final_donor = new_acceptor.copy(state=ShardRange.SHRINKING,
+ state_timestamp=now, epoch=now)
+ self._assert_shard_ranges_equal(
+ [donor, new_donor, final_donor],
+ broker.get_shard_ranges(include_deleted=True))
+ sharder._send_shard_ranges.assert_has_calls(
+ [mock.call(final_donor.account, final_donor.container,
+ [final_donor, broker.get_own_shard_range()])]
+ )
+
+ def test_partition_and_device_filters(self):
+ # verify partitions and devices kwargs result in filtering of processed
+ # containers but not of the local device ids.
+ ring = FakeRing()
+ dev_ids = set()
+ container_data = []
+ for dev in ring.devs:
+ dev_ids.add(dev['id'])
+ part = str(dev['id'])
+ broker = self._make_broker(
+ container='c%s' % dev['id'], hash_='c%shash' % dev['id'],
+ device=dev['device'], part=part)
+ broker.update_metadata({'X-Container-Sysmeta-Sharding':
+ ('true', next(self.ts_iter).internal)})
+ container_data.append((broker.path, dev['id'], part))
+
+ with self._mock_sharder() as sharder:
+ sharder.ring = ring
+ sharder._check_node = lambda *args: True
+ with mock.patch.object(
+ sharder, '_process_broker') as mock_process_broker:
+ sharder.run_once()
+ self.assertEqual(dev_ids, set(sharder._local_device_ids))
+ self.assertEqual(set(container_data),
+ set((call[0][0].path, call[0][1]['id'], call[0][2])
+ for call in mock_process_broker.call_args_list))
+
+ with self._mock_sharder() as sharder:
+ sharder.ring = ring
+ sharder._check_node = lambda *args: True
+ with mock.patch.object(
+ sharder, '_process_broker') as mock_process_broker:
+ sharder.run_once(partitions='0')
+ self.assertEqual(dev_ids, set(sharder._local_device_ids))
+ self.assertEqual(set([container_data[0]]),
+ set((call[0][0].path, call[0][1]['id'], call[0][2])
+ for call in mock_process_broker.call_args_list))
+
+ with self._mock_sharder() as sharder:
+ sharder.ring = ring
+ sharder._check_node = lambda *args: True
+ with mock.patch.object(
+ sharder, '_process_broker') as mock_process_broker:
+ sharder.run_once(partitions='2,0')
+ self.assertEqual(dev_ids, set(sharder._local_device_ids))
+ self.assertEqual(set([container_data[0], container_data[2]]),
+ set((call[0][0].path, call[0][1]['id'], call[0][2])
+ for call in mock_process_broker.call_args_list))
+
+ with self._mock_sharder() as sharder:
+ sharder.ring = ring
+ sharder._check_node = lambda *args: True
+ with mock.patch.object(
+ sharder, '_process_broker') as mock_process_broker:
+ sharder.run_once(partitions='2,0', devices='sdc')
+ self.assertEqual(dev_ids, set(sharder._local_device_ids))
+ self.assertEqual(set([container_data[2]]),
+ set((call[0][0].path, call[0][1]['id'], call[0][2])
+ for call in mock_process_broker.call_args_list))
+
+ with self._mock_sharder() as sharder:
+ sharder.ring = ring
+ sharder._check_node = lambda *args: True
+ with mock.patch.object(
+ sharder, '_process_broker') as mock_process_broker:
+ sharder.run_once(devices='sdb,sdc')
+ self.assertEqual(dev_ids, set(sharder._local_device_ids))
+ self.assertEqual(set(container_data[1:]),
+ set((call[0][0].path, call[0][1]['id'], call[0][2])
+ for call in mock_process_broker.call_args_list))
+
+
+class TestCleavingContext(BaseTestSharder):
+ def test_init(self):
+ ctx = CleavingContext(ref='test')
+ self.assertEqual('test', ctx.ref)
+ self.assertEqual('', ctx.cursor)
+ self.assertIsNone(ctx.max_row)
+ self.assertIsNone(ctx.cleave_to_row)
+ self.assertIsNone(ctx.last_cleave_to_row)
+ self.assertFalse(ctx.misplaced_done)
+ self.assertFalse(ctx.cleaving_done)
+
+ def test_iter(self):
+ ctx = CleavingContext('test', 'curs', 12, 11, 10, False, True, 0, 4)
+ expected = {'ref': 'test',
+ 'cursor': 'curs',
+ 'max_row': 12,
+ 'cleave_to_row': 11,
+ 'last_cleave_to_row': 10,
+ 'cleaving_done': False,
+ 'misplaced_done': True,
+ 'ranges_done': 0,
+ 'ranges_todo': 4}
+ self.assertEqual(expected, dict(ctx))
+
+ def test_cursor(self):
+ broker = self._make_broker()
+ ref = CleavingContext._make_ref(broker)
+
+ for curs in ('curs', u'curs\u00e4\u00fb'):
+ with annotate_failure('%r' % curs):
+ ctx = CleavingContext(ref, curs, 12, 11, 10, False, True)
+ self.assertEqual(curs.encode('utf8'), ctx.cursor)
+ ctx.store(broker)
+ ctx = CleavingContext.load(broker)
+ self.assertEqual(curs.encode('utf8'), ctx.cursor)
+
+ def test_load(self):
+ broker = self._make_broker()
+ for i in range(6):
+ broker.put_object('o%s' % i, next(self.ts_iter).internal, 10,
+ 'text/plain', 'etag_a', 0)
+
+ db_id = broker.get_info()['id']
+ params = {'ref': db_id,
+ 'cursor': 'curs',
+ 'max_row': 2,
+ 'cleave_to_row': 2,
+ 'last_cleave_to_row': 1,
+ 'cleaving_done': False,
+ 'misplaced_done': True,
+ 'ranges_done': 2,
+ 'ranges_todo': 4}
+ key = 'X-Container-Sysmeta-Shard-Context-%s' % db_id
+ broker.update_metadata(
+ {key: (json.dumps(params), Timestamp.now().internal)})
+ ctx = CleavingContext.load(broker)
+ self.assertEqual(db_id, ctx.ref)
+ self.assertEqual('curs', ctx.cursor)
+ # note max_row is dynamically updated during load
+ self.assertEqual(6, ctx.max_row)
+ self.assertEqual(2, ctx.cleave_to_row)
+ self.assertEqual(1, ctx.last_cleave_to_row)
+ self.assertTrue(ctx.misplaced_done)
+ self.assertFalse(ctx.cleaving_done)
+ self.assertEqual(2, ctx.ranges_done)
+ self.assertEqual(4, ctx.ranges_todo)
+
+ def test_store(self):
+ broker = self._make_sharding_broker()
+ old_db_id = broker.get_brokers()[0].get_info()['id']
+ ctx = CleavingContext(old_db_id, 'curs', 12, 11, 2, True, True, 2, 4)
+ ctx.store(broker)
+ key = 'X-Container-Sysmeta-Shard-Context-%s' % old_db_id
+ data = json.loads(broker.metadata[key][0])
+ expected = {'ref': old_db_id,
+ 'cursor': 'curs',
+ 'max_row': 12,
+ 'cleave_to_row': 11,
+ 'last_cleave_to_row': 2,
+ 'cleaving_done': True,
+ 'misplaced_done': True,
+ 'ranges_done': 2,
+ 'ranges_todo': 4}
+ self.assertEqual(expected, data)
+
+ def test_store_add_row_load(self):
+ # adding row to older db changes only max_row in the context
+ broker = self._make_sharding_broker()
+ old_broker = broker.get_brokers()[0]
+ old_db_id = old_broker.get_info()['id']
+ old_broker.merge_items([old_broker._record_to_dict(
+ ('obj', next(self.ts_iter).internal, 0, 'text/plain', 'etag', 1))])
+ old_max_row = old_broker.get_max_row()
+ self.assertEqual(1, old_max_row) # sanity check
+ ctx = CleavingContext(old_db_id, 'curs', 1, 1, 0, True, True)
+ ctx.store(broker)
+
+ # adding a row changes max row
+ old_broker.merge_items([old_broker._record_to_dict(
+ ('obj', next(self.ts_iter).internal, 0, 'text/plain', 'etag', 1))])
+
+ new_ctx = CleavingContext.load(broker)
+ self.assertEqual(old_db_id, new_ctx.ref)
+ self.assertEqual('curs', new_ctx.cursor)
+ self.assertEqual(2, new_ctx.max_row)
+ self.assertEqual(1, new_ctx.cleave_to_row)
+ self.assertEqual(0, new_ctx.last_cleave_to_row)
+ self.assertTrue(new_ctx.misplaced_done)
+ self.assertTrue(new_ctx.cleaving_done)
+
+ def test_store_reclaim_load(self):
+ # reclaiming rows from older db does not change context
+ broker = self._make_sharding_broker()
+ old_broker = broker.get_brokers()[0]
+ old_db_id = old_broker.get_info()['id']
+ old_broker.merge_items([old_broker._record_to_dict(
+ ('obj', next(self.ts_iter).internal, 0, 'text/plain', 'etag', 1))])
+ old_max_row = old_broker.get_max_row()
+ self.assertEqual(1, old_max_row) # sanity check
+ ctx = CleavingContext(old_db_id, 'curs', 1, 1, 0, True, True)
+ ctx.store(broker)
+
+ self.assertEqual(
+ 1, len(old_broker.get_objects()))
+ now = next(self.ts_iter).internal
+ broker.get_brokers()[0].reclaim(now, now)
+ self.assertFalse(old_broker.get_objects())
+
+ new_ctx = CleavingContext.load(broker)
+ self.assertEqual(old_db_id, new_ctx.ref)
+ self.assertEqual('curs', new_ctx.cursor)
+ self.assertEqual(1, new_ctx.max_row)
+ self.assertEqual(1, new_ctx.cleave_to_row)
+ self.assertEqual(0, new_ctx.last_cleave_to_row)
+ self.assertTrue(new_ctx.misplaced_done)
+ self.assertTrue(new_ctx.cleaving_done)
+
+ def test_store_modify_db_id_load(self):
+ # changing id changes ref, so results in a fresh context
+ broker = self._make_sharding_broker()
+ old_broker = broker.get_brokers()[0]
+ old_db_id = old_broker.get_info()['id']
+ ctx = CleavingContext(old_db_id, 'curs', 12, 11, 2, True, True)
+ ctx.store(broker)
+
+ old_broker.newid('fake_remote_id')
+ new_db_id = old_broker.get_info()['id']
+ self.assertNotEqual(old_db_id, new_db_id)
+
+ new_ctx = CleavingContext.load(broker)
+ self.assertEqual(new_db_id, new_ctx.ref)
+ self.assertEqual('', new_ctx.cursor)
+ # note max_row is dynamically updated during load
+ self.assertEqual(-1, new_ctx.max_row)
+ self.assertEqual(None, new_ctx.cleave_to_row)
+ self.assertEqual(None, new_ctx.last_cleave_to_row)
+ self.assertFalse(new_ctx.misplaced_done)
+ self.assertFalse(new_ctx.cleaving_done)
+
+ def test_load_modify_store_load(self):
+ broker = self._make_sharding_broker()
+ old_db_id = broker.get_brokers()[0].get_info()['id']
+ ctx = CleavingContext.load(broker)
+ self.assertEqual(old_db_id, ctx.ref)
+ self.assertEqual('', ctx.cursor) # sanity check
+ ctx.cursor = 'curs'
+ ctx.misplaced_done = True
+ ctx.store(broker)
+ ctx = CleavingContext.load(broker)
+ self.assertEqual(old_db_id, ctx.ref)
+ self.assertEqual('curs', ctx.cursor)
+ self.assertTrue(ctx.misplaced_done)
+
+ def test_reset(self):
+ ctx = CleavingContext('test', 'curs', 12, 11, 2, True, True)
+
+ def check_context():
+ self.assertEqual('test', ctx.ref)
+ self.assertEqual('', ctx.cursor)
+ self.assertEqual(12, ctx.max_row)
+ self.assertEqual(11, ctx.cleave_to_row)
+ self.assertEqual(11, ctx.last_cleave_to_row)
+ self.assertFalse(ctx.misplaced_done)
+ self.assertFalse(ctx.cleaving_done)
+ self.assertEqual(0, ctx.ranges_done)
+ self.assertEqual(0, ctx.ranges_todo)
+ ctx.reset()
+ # check idempotency
+ ctx.reset()
+
+ def test_start(self):
+ ctx = CleavingContext('test', 'curs', 12, 11, 2, True, True)
+
+ def check_context():
+ self.assertEqual('test', ctx.ref)
+ self.assertEqual('', ctx.cursor)
+ self.assertEqual(12, ctx.max_row)
+ self.assertEqual(12, ctx.cleave_to_row)
+ self.assertEqual(2, ctx.last_cleave_to_row)
+ self.assertTrue(ctx.misplaced_done) # *not* reset here
+ self.assertFalse(ctx.cleaving_done)
+ self.assertEqual(0, ctx.ranges_done)
+ self.assertEqual(0, ctx.ranges_todo)
+ ctx.start()
+ # check idempotency
+ ctx.start()
diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py
index 0571a80724..7a77603f4c 100644
--- a/test/unit/obj/test_server.py
+++ b/test/unit/obj/test_server.py
@@ -1053,7 +1053,7 @@ class TestObjectController(unittest.TestCase):
mock_ring = mock.MagicMock()
mock_ring.get_nodes.return_value = (99, [node])
object_updater.container_ring = mock_ring
- mock_update.return_value = ((True, 1))
+ mock_update.return_value = ((True, 1, None))
object_updater.run_once()
self.assertEqual(1, mock_update.call_count)
self.assertEqual((node, 99, 'PUT', '/a/c/o'),
@@ -1061,6 +1061,7 @@ class TestObjectController(unittest.TestCase):
actual_headers = mock_update.call_args_list[0][0][4]
# User-Agent is updated.
expected_post_headers['User-Agent'] = 'object-updater %s' % os.getpid()
+ expected_post_headers['X-Backend-Accept-Redirect'] = 'true'
self.assertDictEqual(expected_post_headers, actual_headers)
self.assertFalse(
os.listdir(os.path.join(
@@ -1073,6 +1074,104 @@ class TestObjectController(unittest.TestCase):
self._test_PUT_then_POST_async_pendings(
POLICIES[1], update_etag='override_etag')
+ def _check_PUT_redirected_async_pending(self, container_path=None):
+ # When container update is redirected verify that the redirect location
+ # is persisted in the async pending file.
+ policy = POLICIES[0]
+ device_dir = os.path.join(self.testdir, 'sda1')
+ t_put = next(self.ts)
+ update_etag = '098f6bcd4621d373cade4e832627b4f6'
+
+ put_headers = {
+ 'X-Trans-Id': 'put_trans_id',
+ 'X-Timestamp': t_put.internal,
+ 'Content-Type': 'application/octet-stream;swift_bytes=123456789',
+ 'Content-Length': '4',
+ 'X-Backend-Storage-Policy-Index': int(policy),
+ 'X-Container-Host': 'chost:3200',
+ 'X-Container-Partition': '99',
+ 'X-Container-Device': 'cdevice'}
+
+ if container_path:
+ # the proxy may include this header
+ put_headers['X-Backend-Container-Path'] = container_path
+ expected_update_path = '/cdevice/99/%s/o' % container_path
+ else:
+ expected_update_path = '/cdevice/99/a/c/o'
+
+ if policy.policy_type == EC_POLICY:
+ put_headers.update({
+ 'X-Object-Sysmeta-Ec-Frag-Index': '2',
+ 'X-Backend-Container-Update-Override-Etag': update_etag,
+ 'X-Object-Sysmeta-Ec-Etag': update_etag})
+
+ req = Request.blank('/sda1/p/a/c/o',
+ environ={'REQUEST_METHOD': 'PUT'},
+ headers=put_headers, body='test')
+ resp_headers = {'Location': '/.sharded_a/c_shard_1/o',
+ 'X-Backend-Redirect-Timestamp': next(self.ts).internal}
+
+ with mocked_http_conn(301, headers=[resp_headers]) as conn, \
+ mock.patch('swift.common.utils.HASH_PATH_PREFIX', ''),\
+ fake_spawn():
+ resp = req.get_response(self.object_controller)
+
+ self.assertEqual(resp.status_int, 201)
+ self.assertEqual(1, len(conn.requests))
+
+ self.assertEqual(expected_update_path, conn.requests[0]['path'])
+
+ # whether or not an X-Backend-Container-Path was received from the
+ # proxy, the async pending file should now have the container_path
+ # equal to the Location header received in the update response.
+ async_pending_file_put = os.path.join(
+ device_dir, diskfile.get_async_dir(policy), 'a83',
+ '06fbf0b514e5199dfc4e00f42eb5ea83-%s' % t_put.internal)
+ self.assertTrue(os.path.isfile(async_pending_file_put),
+ 'Expected %s to be a file but it is not.'
+ % async_pending_file_put)
+ expected_put_headers = {
+ 'Referer': 'PUT http://localhost/sda1/p/a/c/o',
+ 'X-Trans-Id': 'put_trans_id',
+ 'X-Timestamp': t_put.internal,
+ 'X-Content-Type': 'application/octet-stream;swift_bytes=123456789',
+ 'X-Size': '4',
+ 'X-Etag': '098f6bcd4621d373cade4e832627b4f6',
+ 'User-Agent': 'object-server %s' % os.getpid(),
+ 'X-Backend-Storage-Policy-Index': '%d' % int(policy)}
+ if policy.policy_type == EC_POLICY:
+ expected_put_headers['X-Etag'] = update_etag
+ self.assertEqual(
+ {'headers': expected_put_headers,
+ 'account': 'a', 'container': 'c', 'obj': 'o', 'op': 'PUT',
+ 'container_path': '.sharded_a/c_shard_1'},
+ pickle.load(open(async_pending_file_put)))
+
+ # when updater is run its first request will be to the redirect
+ # location that is persisted in the async pending file
+ with mocked_http_conn(201) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache',
+ lambda *args: None):
+ object_updater = updater.ObjectUpdater(
+ {'devices': self.testdir,
+ 'mount_check': 'false'}, logger=debug_logger())
+ node = {'id': 1, 'ip': 'chost', 'port': 3200,
+ 'device': 'cdevice'}
+ mock_ring = mock.MagicMock()
+ mock_ring.get_nodes.return_value = (99, [node])
+ object_updater.container_ring = mock_ring
+ object_updater.run_once()
+
+ self.assertEqual(1, len(conn.requests))
+ self.assertEqual('/cdevice/99/.sharded_a/c_shard_1/o',
+ conn.requests[0]['path'])
+
+ def test_PUT_redirected_async_pending(self):
+ self._check_PUT_redirected_async_pending()
+
+ def test_PUT_redirected_async_pending_with_container_path(self):
+ self._check_PUT_redirected_async_pending(container_path='.another/c')
+
def test_POST_quarantine_zbyte(self):
timestamp = normalize_timestamp(time())
req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'},
@@ -5263,6 +5362,95 @@ class TestObjectController(unittest.TestCase):
'X-Backend-Container-Update-Override-Content-Type': 'ignored',
'X-Backend-Container-Update-Override-Foo': 'ignored'})
+ def test_PUT_container_update_to_shard(self):
+ # verify that alternate container update path is respected when
+ # included in request headers
+ def do_test(container_path, expected_path, expected_container_path):
+ policy = random.choice(list(POLICIES))
+ container_updates = []
+
+ def capture_updates(
+ ip, port, method, path, headers, *args, **kwargs):
+ container_updates.append((ip, port, method, path, headers))
+
+ pickle_async_update_args = []
+
+ def fake_pickle_async_update(*args):
+ pickle_async_update_args.append(args)
+
+ diskfile_mgr = self.object_controller._diskfile_router[policy]
+ diskfile_mgr.pickle_async_update = fake_pickle_async_update
+
+ ts_put = next(self.ts)
+ headers = {
+ 'X-Timestamp': ts_put.internal,
+ 'X-Trans-Id': '123',
+ 'X-Container-Host': 'chost:cport',
+ 'X-Container-Partition': 'cpartition',
+ 'X-Container-Device': 'cdevice',
+ 'Content-Type': 'text/plain',
+ 'X-Object-Sysmeta-Ec-Frag-Index': 0,
+ 'X-Backend-Storage-Policy-Index': int(policy),
+ }
+ if container_path is not None:
+ headers['X-Backend-Container-Path'] = container_path
+
+ req = Request.blank('/sda1/0/a/c/o', method='PUT',
+ headers=headers, body='')
+ with mocked_http_conn(
+ 500, give_connect=capture_updates) as fake_conn:
+ with fake_spawn():
+ resp = req.get_response(self.object_controller)
+ self.assertRaises(StopIteration, fake_conn.code_iter.next)
+ self.assertEqual(resp.status_int, 201)
+ self.assertEqual(len(container_updates), 1)
+ # verify expected path used in update request
+ ip, port, method, path, headers = container_updates[0]
+ self.assertEqual(ip, 'chost')
+ self.assertEqual(port, 'cport')
+ self.assertEqual(method, 'PUT')
+ self.assertEqual(path, '/cdevice/cpartition/%s/o' % expected_path)
+
+ # verify that the picked update *always* has root container
+ self.assertEqual(1, len(pickle_async_update_args))
+ (objdevice, account, container, obj, data, timestamp,
+ policy) = pickle_async_update_args[0]
+ self.assertEqual(objdevice, 'sda1')
+ self.assertEqual(account, 'a') # NB user account
+ self.assertEqual(container, 'c') # NB root container
+ self.assertEqual(obj, 'o')
+ self.assertEqual(timestamp, ts_put.internal)
+ self.assertEqual(policy, policy)
+ expected_data = {
+ 'headers': HeaderKeyDict({
+ 'X-Size': '0',
+ 'User-Agent': 'object-server %s' % os.getpid(),
+ 'X-Content-Type': 'text/plain',
+ 'X-Timestamp': ts_put.internal,
+ 'X-Trans-Id': '123',
+ 'Referer': 'PUT http://localhost/sda1/0/a/c/o',
+ 'X-Backend-Storage-Policy-Index': int(policy),
+ 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e'}),
+ 'obj': 'o',
+ 'account': 'a',
+ 'container': 'c',
+ 'op': 'PUT'}
+ if expected_container_path:
+ expected_data['container_path'] = expected_container_path
+ self.assertEqual(expected_data, data)
+
+ do_test('a_shard/c_shard', 'a_shard/c_shard', 'a_shard/c_shard')
+ do_test('', 'a/c', None)
+ do_test(None, 'a/c', None)
+ # TODO: should these cases trigger a 400 response rather than
+ # defaulting to root path?
+ do_test('garbage', 'a/c', None)
+ do_test('/', 'a/c', None)
+ do_test('/no-acct', 'a/c', None)
+ do_test('no-cont/', 'a/c', None)
+ do_test('too/many/parts', 'a/c', None)
+ do_test('/leading/slash', 'a/c', None)
+
def test_container_update_async(self):
policy = random.choice(list(POLICIES))
req = Request.blank(
@@ -5335,23 +5523,21 @@ class TestObjectController(unittest.TestCase):
'X-Container-Partition': '20',
'X-Container-Host': '1.2.3.4:5',
'X-Container-Device': 'sdb1'})
- with mock.patch.object(object_server, 'spawn',
- local_fake_spawn):
- with mock.patch.object(self.object_controller,
- 'async_update',
- local_fake_async_update):
- resp = req.get_response(self.object_controller)
- # check the response is completed and successful
- self.assertEqual(resp.status_int, 201)
- # check that async_update hasn't been called
- self.assertFalse(len(called_async_update_args))
- # now do the work in greenthreads
- for func, a, kw in saved_spawn_calls:
- gt = spawn(func, *a, **kw)
- greenthreads.append(gt)
- # wait for the greenthreads to finish
- for gt in greenthreads:
- gt.wait()
+ with mock.patch.object(object_server, 'spawn', local_fake_spawn), \
+ mock.patch.object(self.object_controller, 'async_update',
+ local_fake_async_update):
+ resp = req.get_response(self.object_controller)
+ # check the response is completed and successful
+ self.assertEqual(resp.status_int, 201)
+ # check that async_update hasn't been called
+ self.assertFalse(len(called_async_update_args))
+ # now do the work in greenthreads
+ for func, a, kw in saved_spawn_calls:
+ gt = spawn(func, *a, **kw)
+ greenthreads.append(gt)
+ # wait for the greenthreads to finish
+ for gt in greenthreads:
+ gt.wait()
# check that the calls to async_update have happened
headers_out = {'X-Size': '0',
'X-Content-Type': 'application/burrito',
@@ -5362,7 +5548,8 @@ class TestObjectController(unittest.TestCase):
'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e'}
expected = [('PUT', 'a', 'c', 'o', '1.2.3.4:5', '20', 'sdb1',
headers_out, 'sda1', POLICIES[0]),
- {'logger_thread_locals': (None, None)}]
+ {'logger_thread_locals': (None, None),
+ 'container_path': None}]
self.assertEqual(called_async_update_args, [expected])
def test_container_update_as_greenthread_with_timeout(self):
diff --git a/test/unit/obj/test_updater.py b/test/unit/obj/test_updater.py
index aac6325254..ae51153b8e 100644
--- a/test/unit/obj/test_updater.py
+++ b/test/unit/obj/test_updater.py
@@ -65,7 +65,9 @@ class TestObjectUpdater(unittest.TestCase):
{'id': 1, 'ip': '127.0.0.1', 'port': 1,
'device': 'sda1', 'zone': 2},
{'id': 2, 'ip': '127.0.0.1', 'port': 1,
- 'device': 'sda1', 'zone': 4}], 30),
+ 'device': 'sda1', 'zone': 4},
+ {'id': 3, 'ip': '127.0.0.1', 'port': 1,
+ 'device': 'sda1', 'zone': 6}], 30),
f)
self.devices_dir = os.path.join(self.testdir, 'devices')
os.mkdir(self.devices_dir)
@@ -74,6 +76,7 @@ class TestObjectUpdater(unittest.TestCase):
for policy in POLICIES:
os.mkdir(os.path.join(self.sda1, get_tmp_dir(policy)))
self.logger = debug_logger()
+ self.ts_iter = make_timestamp_iter()
def tearDown(self):
rmtree(self.testdir, ignore_errors=1)
@@ -299,19 +302,22 @@ class TestObjectUpdater(unittest.TestCase):
self.assertIn("sweep progress", info_lines[1])
# the space ensures it's a positive number
self.assertIn(
- "2 successes, 0 failures, 0 quarantines, 2 unlinks, 0 error",
+ "2 successes, 0 failures, 0 quarantines, 2 unlinks, 0 errors, "
+ "0 redirects",
info_lines[1])
self.assertIn(self.sda1, info_lines[1])
self.assertIn("sweep progress", info_lines[2])
self.assertIn(
- "4 successes, 0 failures, 0 quarantines, 4 unlinks, 0 error",
+ "4 successes, 0 failures, 0 quarantines, 4 unlinks, 0 errors, "
+ "0 redirects",
info_lines[2])
self.assertIn(self.sda1, info_lines[2])
self.assertIn("sweep complete", info_lines[3])
self.assertIn(
- "5 successes, 0 failures, 0 quarantines, 5 unlinks, 0 error",
+ "5 successes, 0 failures, 0 quarantines, 5 unlinks, 0 errors, "
+ "0 redirects",
info_lines[3])
self.assertIn(self.sda1, info_lines[3])
@@ -547,6 +553,26 @@ class TestObjectUpdater(unittest.TestCase):
{'successes': 1, 'unlinks': 1,
'async_pendings': 1})
+ def _write_async_update(self, dfmanager, timestamp, policy,
+ headers=None, container_path=None):
+ # write an async
+ account, container, obj = 'a', 'c', 'o'
+ op = 'PUT'
+ headers_out = headers or {
+ 'x-size': 0,
+ 'x-content-type': 'text/plain',
+ 'x-etag': 'd41d8cd98f00b204e9800998ecf8427e',
+ 'x-timestamp': timestamp.internal,
+ 'X-Backend-Storage-Policy-Index': int(policy),
+ 'User-Agent': 'object-server %s' % os.getpid()
+ }
+ data = {'op': op, 'account': account, 'container': container,
+ 'obj': obj, 'headers': headers_out}
+ if container_path:
+ data['container_path'] = container_path
+ dfmanager.pickle_async_update(self.sda1, account, container, obj,
+ data, timestamp, policy)
+
def test_obj_put_async_updates(self):
ts_iter = make_timestamp_iter()
policies = list(POLICIES)
@@ -562,16 +588,12 @@ class TestObjectUpdater(unittest.TestCase):
async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
os.mkdir(async_dir)
- def do_test(headers_out, expected):
+ def do_test(headers_out, expected, container_path=None):
# write an async
dfmanager = DiskFileManager(conf, daemon.logger)
- account, container, obj = 'a', 'c', 'o'
- op = 'PUT'
- data = {'op': op, 'account': account, 'container': container,
- 'obj': obj, 'headers': headers_out}
- dfmanager.pickle_async_update(self.sda1, account, container, obj,
- data, next(ts_iter), policies[0])
-
+ self._write_async_update(dfmanager, next(ts_iter), policies[0],
+ headers=headers_out,
+ container_path=container_path)
request_log = []
def capture(*args, **kwargs):
@@ -613,11 +635,21 @@ class TestObjectUpdater(unittest.TestCase):
'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e',
'X-Timestamp': ts.normal,
'X-Backend-Storage-Policy-Index': str(int(policies[0])),
- 'User-Agent': 'object-updater %s' % os.getpid()
+ 'User-Agent': 'object-updater %s' % os.getpid(),
+ 'X-Backend-Accept-Redirect': 'true',
}
+ # always expect X-Backend-Accept-Redirect to be true
+ do_test(headers_out, expected, container_path='.shards_a/shard_c')
do_test(headers_out, expected)
+ # ...unless X-Backend-Accept-Redirect is already set
+ expected['X-Backend-Accept-Redirect'] = 'false'
+ headers_out_2 = dict(headers_out)
+ headers_out_2['X-Backend-Accept-Redirect'] = 'false'
+ do_test(headers_out_2, expected)
+
# updater should add policy header if missing
+ expected['X-Backend-Accept-Redirect'] = 'true'
headers_out['X-Backend-Storage-Policy-Index'] = None
do_test(headers_out, expected)
@@ -632,6 +664,414 @@ class TestObjectUpdater(unittest.TestCase):
'X-Backend-Storage-Policy-Index')
do_test(headers_out, expected)
+ def _check_update_requests(self, requests, timestamp, policy):
+ # do some sanity checks on update request
+ expected_headers = {
+ 'X-Size': '0',
+ 'X-Content-Type': 'text/plain',
+ 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e',
+ 'X-Timestamp': timestamp.internal,
+ 'X-Backend-Storage-Policy-Index': str(int(policy)),
+ 'User-Agent': 'object-updater %s' % os.getpid(),
+ 'X-Backend-Accept-Redirect': 'true'}
+ for request in requests:
+ self.assertEqual('PUT', request['method'])
+ self.assertDictEqual(expected_headers, request['headers'])
+
+ def test_obj_put_async_root_update_redirected(self):
+ policies = list(POLICIES)
+ random.shuffle(policies)
+ # setup updater
+ conf = {
+ 'devices': self.devices_dir,
+ 'mount_check': 'false',
+ 'swift_dir': self.testdir,
+ }
+ daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
+ async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
+ os.mkdir(async_dir)
+ dfmanager = DiskFileManager(conf, daemon.logger)
+
+ ts_obj = next(self.ts_iter)
+ self._write_async_update(dfmanager, ts_obj, policies[0])
+
+ # run once
+ ts_redirect_1 = next(self.ts_iter)
+ ts_redirect_2 = next(self.ts_iter)
+ fake_responses = [
+ # first round of update attempts, newest redirect should be chosen
+ (200, {}),
+ (301, {'Location': '/.shards_a/c_shard_new/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect_2.internal}),
+ (301, {'Location': '/.shards_a/c_shard_old/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect_1.internal}),
+ # second round of update attempts
+ (200, {}),
+ (200, {}),
+ (200, {}),
+ ]
+ fake_status_codes, fake_headers = zip(*fake_responses)
+ with mocked_http_conn(
+ *fake_status_codes, headers=fake_headers) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+
+ self._check_update_requests(conn.requests[:3], ts_obj, policies[0])
+ self._check_update_requests(conn.requests[3:], ts_obj, policies[0])
+ self.assertEqual(['/sda1/0/a/c/o'] * 3 +
+ ['/sda1/0/.shards_a/c_shard_new/o'] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'redirects': 1, 'successes': 1,
+ 'unlinks': 1, 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ self.assertFalse(os.listdir(async_dir)) # no async file
+
+ def test_obj_put_async_root_update_redirected_previous_success(self):
+ policies = list(POLICIES)
+ random.shuffle(policies)
+ # setup updater
+ conf = {
+ 'devices': self.devices_dir,
+ 'mount_check': 'false',
+ 'swift_dir': self.testdir,
+ }
+ daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
+ async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
+ os.mkdir(async_dir)
+ dfmanager = DiskFileManager(conf, daemon.logger)
+
+ ts_obj = next(self.ts_iter)
+ self._write_async_update(dfmanager, ts_obj, policies[0])
+ orig_async_path, orig_async_data = self._check_async_file(async_dir)
+
+ # run once
+ with mocked_http_conn(
+ 507, 200, 507) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+
+ self._check_update_requests(conn.requests, ts_obj, policies[0])
+ self.assertEqual(['/sda1/0/a/c/o'] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'failures': 1, 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ async_path, async_data = self._check_async_file(async_dir)
+ self.assertEqual(dict(orig_async_data, successes=[1]), async_data)
+
+ # run again - expect 3 redirected updates despite previous success
+ ts_redirect = next(self.ts_iter)
+ resp_headers_1 = {'Location': '/.shards_a/c_shard_1/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect.internal}
+ fake_responses = (
+ # 1st round of redirects, 2nd round of redirects
+ [(301, resp_headers_1)] * 2 + [(200, {})] * 3)
+ fake_status_codes, fake_headers = zip(*fake_responses)
+ with mocked_http_conn(
+ *fake_status_codes, headers=fake_headers) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+
+ self._check_update_requests(conn.requests[:2], ts_obj, policies[0])
+ self._check_update_requests(conn.requests[2:], ts_obj, policies[0])
+ root_part = daemon.container_ring.get_part('a/c')
+ shard_1_part = daemon.container_ring.get_part('.shards_a/c_shard_1')
+ self.assertEqual(
+ ['/sda1/%s/a/c/o' % root_part] * 2 +
+ ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'redirects': 1, 'successes': 1, 'failures': 1, 'unlinks': 1,
+ 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ self.assertFalse(os.listdir(async_dir)) # no async file
+
+ def _check_async_file(self, async_dir):
+ async_subdirs = os.listdir(async_dir)
+ self.assertEqual([mock.ANY], async_subdirs)
+ async_files = os.listdir(os.path.join(async_dir, async_subdirs[0]))
+ self.assertEqual([mock.ANY], async_files)
+ async_path = os.path.join(
+ async_dir, async_subdirs[0], async_files[0])
+ with open(async_path) as fd:
+ async_data = pickle.load(fd)
+ return async_path, async_data
+
+ def _check_obj_put_async_update_bad_redirect_headers(self, headers):
+ policies = list(POLICIES)
+ random.shuffle(policies)
+ # setup updater
+ conf = {
+ 'devices': self.devices_dir,
+ 'mount_check': 'false',
+ 'swift_dir': self.testdir,
+ }
+ daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
+ async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
+ os.mkdir(async_dir)
+ dfmanager = DiskFileManager(conf, daemon.logger)
+
+ ts_obj = next(self.ts_iter)
+ self._write_async_update(dfmanager, ts_obj, policies[0])
+ orig_async_path, orig_async_data = self._check_async_file(async_dir)
+
+ fake_responses = [
+ (301, headers),
+ (301, headers),
+ (301, headers),
+ ]
+ fake_status_codes, fake_headers = zip(*fake_responses)
+ with mocked_http_conn(
+ *fake_status_codes, headers=fake_headers) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+
+ self._check_update_requests(conn.requests, ts_obj, policies[0])
+ self.assertEqual(['/sda1/0/a/c/o'] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'failures': 1, 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ # async file still intact
+ async_path, async_data = self._check_async_file(async_dir)
+ self.assertEqual(orig_async_path, async_path)
+ self.assertEqual(orig_async_data, async_data)
+ return daemon
+
+ def test_obj_put_async_root_update_missing_location_header(self):
+ headers = {
+ 'X-Backend-Redirect-Timestamp': next(self.ts_iter).internal}
+ self._check_obj_put_async_update_bad_redirect_headers(headers)
+
+ def test_obj_put_async_root_update_bad_location_header(self):
+ headers = {
+ 'Location': 'bad bad bad',
+ 'X-Backend-Redirect-Timestamp': next(self.ts_iter).internal}
+ daemon = self._check_obj_put_async_update_bad_redirect_headers(headers)
+ error_lines = daemon.logger.get_lines_for_level('error')
+ self.assertIn('Container update failed', error_lines[0])
+ self.assertIn('Invalid path: bad%20bad%20bad', error_lines[0])
+
+ def test_obj_put_async_shard_update_redirected_twice(self):
+ policies = list(POLICIES)
+ random.shuffle(policies)
+ # setup updater
+ conf = {
+ 'devices': self.devices_dir,
+ 'mount_check': 'false',
+ 'swift_dir': self.testdir,
+ }
+ daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
+ async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
+ os.mkdir(async_dir)
+ dfmanager = DiskFileManager(conf, daemon.logger)
+
+ ts_obj = next(self.ts_iter)
+ self._write_async_update(dfmanager, ts_obj, policies[0],
+ container_path='.shards_a/c_shard_older')
+ orig_async_path, orig_async_data = self._check_async_file(async_dir)
+
+ # run once
+ ts_redirect_1 = next(self.ts_iter)
+ ts_redirect_2 = next(self.ts_iter)
+ ts_redirect_3 = next(self.ts_iter)
+ fake_responses = [
+ # 1st round of redirects, newest redirect should be chosen
+ (301, {'Location': '/.shards_a/c_shard_old/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect_1.internal}),
+ (301, {'Location': '/.shards_a/c_shard_new/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect_2.internal}),
+ (301, {'Location': '/.shards_a/c_shard_old/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect_1.internal}),
+ # 2nd round of redirects
+ (301, {'Location': '/.shards_a/c_shard_newer/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect_3.internal}),
+ (301, {'Location': '/.shards_a/c_shard_newer/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect_3.internal}),
+ (301, {'Location': '/.shards_a/c_shard_newer/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect_3.internal}),
+ ]
+ fake_status_codes, fake_headers = zip(*fake_responses)
+ with mocked_http_conn(
+ *fake_status_codes, headers=fake_headers) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+
+ self._check_update_requests(conn.requests, ts_obj, policies[0])
+ # only *one* set of redirected requests is attempted per cycle
+ older_part = daemon.container_ring.get_part('.shards_a/c_shard_older')
+ new_part = daemon.container_ring.get_part('.shards_a/c_shard_new')
+ newer_part = daemon.container_ring.get_part('.shards_a/c_shard_newer')
+ self.assertEqual(
+ ['/sda1/%s/.shards_a/c_shard_older/o' % older_part] * 3 +
+ ['/sda1/%s/.shards_a/c_shard_new/o' % new_part] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'redirects': 2, 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ # update failed, we still have pending file with most recent redirect
+ # response Location header value added to data
+ async_path, async_data = self._check_async_file(async_dir)
+ self.assertEqual(orig_async_path, async_path)
+ self.assertEqual(
+ dict(orig_async_data, container_path='.shards_a/c_shard_newer',
+ redirect_history=['.shards_a/c_shard_new',
+ '.shards_a/c_shard_newer']),
+ async_data)
+
+ # next cycle, should get latest redirect from pickled async update
+ fake_responses = [(200, {})] * 3
+ fake_status_codes, fake_headers = zip(*fake_responses)
+ with mocked_http_conn(
+ *fake_status_codes, headers=fake_headers) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+
+ self._check_update_requests(conn.requests, ts_obj, policies[0])
+ self.assertEqual(
+ ['/sda1/%s/.shards_a/c_shard_newer/o' % newer_part] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'redirects': 2, 'successes': 1, 'unlinks': 1,
+ 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ self.assertFalse(os.listdir(async_dir)) # no async file
+
+ def test_obj_put_async_update_redirection_loop(self):
+ policies = list(POLICIES)
+ random.shuffle(policies)
+ # setup updater
+ conf = {
+ 'devices': self.devices_dir,
+ 'mount_check': 'false',
+ 'swift_dir': self.testdir,
+ }
+ daemon = object_updater.ObjectUpdater(conf, logger=self.logger)
+ async_dir = os.path.join(self.sda1, get_async_dir(policies[0]))
+ os.mkdir(async_dir)
+ dfmanager = DiskFileManager(conf, daemon.logger)
+
+ ts_obj = next(self.ts_iter)
+ self._write_async_update(dfmanager, ts_obj, policies[0])
+ orig_async_path, orig_async_data = self._check_async_file(async_dir)
+
+ # run once
+ ts_redirect = next(self.ts_iter)
+
+ resp_headers_1 = {'Location': '/.shards_a/c_shard_1/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect.internal}
+ resp_headers_2 = {'Location': '/.shards_a/c_shard_2/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect.internal}
+ fake_responses = (
+ # 1st round of redirects, 2nd round of redirects
+ [(301, resp_headers_1)] * 3 + [(301, resp_headers_2)] * 3)
+ fake_status_codes, fake_headers = zip(*fake_responses)
+ with mocked_http_conn(
+ *fake_status_codes, headers=fake_headers) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+ self._check_update_requests(conn.requests[:3], ts_obj, policies[0])
+ self._check_update_requests(conn.requests[3:], ts_obj, policies[0])
+ # only *one* set of redirected requests is attempted per cycle
+ root_part = daemon.container_ring.get_part('a/c')
+ shard_1_part = daemon.container_ring.get_part('.shards_a/c_shard_1')
+ shard_2_part = daemon.container_ring.get_part('.shards_a/c_shard_2')
+ shard_3_part = daemon.container_ring.get_part('.shards_a/c_shard_3')
+ self.assertEqual(['/sda1/%s/a/c/o' % root_part] * 3 +
+ ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'redirects': 2, 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ # update failed, we still have pending file with most recent redirect
+ # response Location header value added to data
+ async_path, async_data = self._check_async_file(async_dir)
+ self.assertEqual(orig_async_path, async_path)
+ self.assertEqual(
+ dict(orig_async_data, container_path='.shards_a/c_shard_2',
+ redirect_history=['.shards_a/c_shard_1',
+ '.shards_a/c_shard_2']),
+ async_data)
+
+ # next cycle, more redirects! first is to previously visited location
+ resp_headers_3 = {'Location': '/.shards_a/c_shard_3/o',
+ 'X-Backend-Redirect-Timestamp': ts_redirect.internal}
+ fake_responses = (
+ # 1st round of redirects, 2nd round of redirects
+ [(301, resp_headers_1)] * 3 + [(301, resp_headers_3)] * 3)
+ fake_status_codes, fake_headers = zip(*fake_responses)
+ with mocked_http_conn(
+ *fake_status_codes, headers=fake_headers) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+ self._check_update_requests(conn.requests[:3], ts_obj, policies[0])
+ self._check_update_requests(conn.requests[3:], ts_obj, policies[0])
+ # first try the previously persisted container path, response to that
+ # creates a loop so ignore and send to root
+ self.assertEqual(
+ ['/sda1/%s/.shards_a/c_shard_2/o' % shard_2_part] * 3 +
+ ['/sda1/%s/a/c/o' % root_part] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'redirects': 4, 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ # update failed, we still have pending file with most recent redirect
+ # response Location header value from root added to persisted data
+ async_path, async_data = self._check_async_file(async_dir)
+ self.assertEqual(orig_async_path, async_path)
+ # note: redirect_history was reset when falling back to root
+ self.assertEqual(
+ dict(orig_async_data, container_path='.shards_a/c_shard_3',
+ redirect_history=['.shards_a/c_shard_3']),
+ async_data)
+
+ # next cycle, more redirects! first is to a location visited previously
+ # but not since last fall back to root, so that location IS tried;
+ # second is to a location visited since last fall back to root so that
+ # location is NOT tried
+ fake_responses = (
+ # 1st round of redirects, 2nd round of redirects
+ [(301, resp_headers_1)] * 3 + [(301, resp_headers_3)] * 3)
+ fake_status_codes, fake_headers = zip(*fake_responses)
+ with mocked_http_conn(
+ *fake_status_codes, headers=fake_headers) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+ self._check_update_requests(conn.requests, ts_obj, policies[0])
+ self.assertEqual(
+ ['/sda1/%s/.shards_a/c_shard_3/o' % shard_3_part] * 3 +
+ ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'redirects': 6, 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ # update failed, we still have pending file, but container_path is None
+ # because most recent redirect location was a repeat
+ async_path, async_data = self._check_async_file(async_dir)
+ self.assertEqual(orig_async_path, async_path)
+ self.assertEqual(
+ dict(orig_async_data, container_path=None,
+ redirect_history=[]),
+ async_data)
+
+ # next cycle, persisted container path is None so update should go to
+ # root, this time it succeeds
+ fake_responses = [(200, {})] * 3
+ fake_status_codes, fake_headers = zip(*fake_responses)
+ with mocked_http_conn(
+ *fake_status_codes, headers=fake_headers) as conn:
+ with mock.patch('swift.obj.updater.dump_recon_cache'):
+ daemon.run_once()
+ self._check_update_requests(conn.requests, ts_obj, policies[0])
+ self.assertEqual(['/sda1/%s/a/c/o' % root_part] * 3,
+ [req['path'] for req in conn.requests])
+ self.assertEqual(
+ {'redirects': 6, 'successes': 1, 'unlinks': 1,
+ 'async_pendings': 1},
+ daemon.logger.get_increment_counts())
+ self.assertFalse(os.listdir(async_dir)) # no async file
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/unit/proxy/controllers/test_base.py b/test/unit/proxy/controllers/test_base.py
index 60d17c9ec8..93d71f6288 100644
--- a/test/unit/proxy/controllers/test_base.py
+++ b/test/unit/proxy/controllers/test_base.py
@@ -14,6 +14,7 @@
# limitations under the License.
import itertools
+import json
from collections import defaultdict
import unittest
import mock
@@ -23,11 +24,14 @@ from swift.proxy.controllers.base import headers_to_container_info, \
Controller, GetOrHeadHandler, bytes_to_skip
from swift.common.swob import Request, HTTPException, RESPONSE_REASONS
from swift.common import exceptions
-from swift.common.utils import split_path
+from swift.common.utils import split_path, ShardRange, Timestamp
from swift.common.header_key_dict import HeaderKeyDict
from swift.common.http import is_success
from swift.common.storage_policy import StoragePolicy, StoragePolicyCollection
-from test.unit import fake_http_connect, FakeRing, FakeMemcache, PatchPolicies
+from test.unit import (
+ fake_http_connect, FakeRing, FakeMemcache, PatchPolicies, FakeLogger,
+ make_timestamp_iter,
+ mocked_http_conn)
from swift.proxy import server as proxy_server
from swift.common.request_helpers import (
get_sys_meta_prefix, get_object_transient_sysmeta
@@ -172,7 +176,8 @@ class TestFuncs(unittest.TestCase):
def setUp(self):
self.app = proxy_server.Application(None, FakeMemcache(),
account_ring=FakeRing(),
- container_ring=FakeRing())
+ container_ring=FakeRing(),
+ logger=FakeLogger())
def test_get_info_zero_recheck(self):
mock_cache = mock.Mock()
@@ -1030,3 +1035,146 @@ class TestFuncs(unittest.TestCase):
# prime numbers
self.assertEqual(bytes_to_skip(11, 7), 4)
self.assertEqual(bytes_to_skip(97, 7873823), 55)
+
+ def test_get_shard_ranges_for_container_get(self):
+ ts_iter = make_timestamp_iter()
+ shard_ranges = [dict(ShardRange(
+ '.sharded_a/sr%d' % i, next(ts_iter), '%d_lower' % i,
+ '%d_upper' % i, object_count=i, bytes_used=1024 * i,
+ meta_timestamp=next(ts_iter)))
+ for i in range(3)]
+ base = Controller(self.app)
+ req = Request.blank('/v1/a/c', method='GET')
+ resp_headers = {'X-Backend-Record-Type': 'shard'}
+ with mocked_http_conn(
+ 200, 200, body_iter=iter(['', json.dumps(shard_ranges)]),
+ headers=resp_headers
+ ) as fake_conn:
+ actual = base._get_shard_ranges(req, 'a', 'c')
+
+ # account info
+ captured = fake_conn.requests
+ self.assertEqual('HEAD', captured[0]['method'])
+ self.assertEqual('a', captured[0]['path'][7:])
+ # container GET
+ self.assertEqual('GET', captured[1]['method'])
+ self.assertEqual('a/c', captured[1]['path'][7:])
+ self.assertEqual('format=json', captured[1]['qs'])
+ self.assertEqual(
+ 'shard', captured[1]['headers'].get('X-Backend-Record-Type'))
+ self.assertEqual(shard_ranges, [dict(pr) for pr in actual])
+ self.assertFalse(self.app.logger.get_lines_for_level('error'))
+
+ def test_get_shard_ranges_for_object_put(self):
+ ts_iter = make_timestamp_iter()
+ shard_ranges = [dict(ShardRange(
+ '.sharded_a/sr%d' % i, next(ts_iter), '%d_lower' % i,
+ '%d_upper' % i, object_count=i, bytes_used=1024 * i,
+ meta_timestamp=next(ts_iter)))
+ for i in range(3)]
+ base = Controller(self.app)
+ req = Request.blank('/v1/a/c/o', method='PUT')
+ resp_headers = {'X-Backend-Record-Type': 'shard'}
+ with mocked_http_conn(
+ 200, 200, body_iter=iter(['', json.dumps(shard_ranges[1:2])]),
+ headers=resp_headers
+ ) as fake_conn:
+ actual = base._get_shard_ranges(req, 'a', 'c', '1_test')
+
+ # account info
+ captured = fake_conn.requests
+ self.assertEqual('HEAD', captured[0]['method'])
+ self.assertEqual('a', captured[0]['path'][7:])
+ # container GET
+ self.assertEqual('GET', captured[1]['method'])
+ self.assertEqual('a/c', captured[1]['path'][7:])
+ params = sorted(captured[1]['qs'].split('&'))
+ self.assertEqual(
+ ['format=json', 'includes=1_test'], params)
+ self.assertEqual(
+ 'shard', captured[1]['headers'].get('X-Backend-Record-Type'))
+ self.assertEqual(shard_ranges[1:2], [dict(pr) for pr in actual])
+ self.assertFalse(self.app.logger.get_lines_for_level('error'))
+
+ def _check_get_shard_ranges_bad_data(self, body):
+ base = Controller(self.app)
+ req = Request.blank('/v1/a/c/o', method='PUT')
+ # empty response
+ headers = {'X-Backend-Record-Type': 'shard'}
+ with mocked_http_conn(200, 200, body_iter=iter(['', body]),
+ headers=headers):
+ actual = base._get_shard_ranges(req, 'a', 'c', '1_test')
+ self.assertIsNone(actual)
+ lines = self.app.logger.get_lines_for_level('error')
+ return lines
+
+ def test_get_shard_ranges_empty_body(self):
+ error_lines = self._check_get_shard_ranges_bad_data('')
+ self.assertIn('Problem with listing response', error_lines[0])
+ self.assertIn('No JSON', error_lines[0])
+ self.assertFalse(error_lines[1:])
+
+ def test_get_shard_ranges_not_a_list(self):
+ error_lines = self._check_get_shard_ranges_bad_data(json.dumps({}))
+ self.assertIn('Problem with listing response', error_lines[0])
+ self.assertIn('not a list', error_lines[0])
+ self.assertFalse(error_lines[1:])
+
+ def test_get_shard_ranges_key_missing(self):
+ error_lines = self._check_get_shard_ranges_bad_data(json.dumps([{}]))
+ self.assertIn('Failed to get shard ranges', error_lines[0])
+ self.assertIn('KeyError', error_lines[0])
+ self.assertFalse(error_lines[1:])
+
+ def test_get_shard_ranges_invalid_shard_range(self):
+ sr = ShardRange('a/c', Timestamp.now())
+ bad_sr_data = dict(sr, name='bad_name')
+ error_lines = self._check_get_shard_ranges_bad_data(
+ json.dumps([bad_sr_data]))
+ self.assertIn('Failed to get shard ranges', error_lines[0])
+ self.assertIn('ValueError', error_lines[0])
+ self.assertFalse(error_lines[1:])
+
+ def test_get_shard_ranges_missing_record_type(self):
+ base = Controller(self.app)
+ req = Request.blank('/v1/a/c/o', method='PUT')
+ sr = ShardRange('a/c', Timestamp.now())
+ body = json.dumps([dict(sr)])
+ with mocked_http_conn(
+ 200, 200, body_iter=iter(['', body])):
+ actual = base._get_shard_ranges(req, 'a', 'c', '1_test')
+ self.assertIsNone(actual)
+ error_lines = self.app.logger.get_lines_for_level('error')
+ self.assertIn('Failed to get shard ranges', error_lines[0])
+ self.assertIn('unexpected record type', error_lines[0])
+ self.assertIn('/a/c', error_lines[0])
+ self.assertFalse(error_lines[1:])
+
+ def test_get_shard_ranges_wrong_record_type(self):
+ base = Controller(self.app)
+ req = Request.blank('/v1/a/c/o', method='PUT')
+ sr = ShardRange('a/c', Timestamp.now())
+ body = json.dumps([dict(sr)])
+ headers = {'X-Backend-Record-Type': 'object'}
+ with mocked_http_conn(
+ 200, 200, body_iter=iter(['', body]),
+ headers=headers):
+ actual = base._get_shard_ranges(req, 'a', 'c', '1_test')
+ self.assertIsNone(actual)
+ error_lines = self.app.logger.get_lines_for_level('error')
+ self.assertIn('Failed to get shard ranges', error_lines[0])
+ self.assertIn('unexpected record type', error_lines[0])
+ self.assertIn('/a/c', error_lines[0])
+ self.assertFalse(error_lines[1:])
+
+ def test_get_shard_ranges_request_failed(self):
+ base = Controller(self.app)
+ req = Request.blank('/v1/a/c/o', method='PUT')
+ with mocked_http_conn(200, 404, 404, 404):
+ actual = base._get_shard_ranges(req, 'a', 'c', '1_test')
+ self.assertIsNone(actual)
+ self.assertFalse(self.app.logger.get_lines_for_level('error'))
+ warning_lines = self.app.logger.get_lines_for_level('warning')
+ self.assertIn('Failed to get container listing', warning_lines[0])
+ self.assertIn('/a/c', warning_lines[0])
+ self.assertFalse(warning_lines[1:])
diff --git a/test/unit/proxy/controllers/test_container.py b/test/unit/proxy/controllers/test_container.py
index 03d53c2fde..ae44f8b001 100644
--- a/test/unit/proxy/controllers/test_container.py
+++ b/test/unit/proxy/controllers/test_container.py
@@ -12,17 +12,24 @@
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+import json
import mock
import socket
import unittest
from eventlet import Timeout
+from six.moves import urllib
+from swift.common.constraints import CONTAINER_LISTING_LIMIT
from swift.common.swob import Request
+from swift.common.utils import ShardRange, Timestamp
from swift.proxy import server as proxy_server
-from swift.proxy.controllers.base import headers_to_container_info, Controller
-from test.unit import fake_http_connect, FakeRing, FakeMemcache
+from swift.proxy.controllers.base import headers_to_container_info, Controller, \
+ get_container_info
+from test import annotate_failure
+from test.unit import fake_http_connect, FakeRing, FakeMemcache, \
+ make_timestamp_iter
from swift.common.storage_policy import StoragePolicy
from swift.common.request_helpers import get_sys_meta_prefix
@@ -72,6 +79,7 @@ class TestContainerController(TestRingBase):
new=FakeAccountInfoContainerController):
return _orig_get_controller(*args, **kwargs)
self.app.get_controller = wrapped_get_controller
+ self.ts_iter = make_timestamp_iter()
def _make_callback_func(self, context):
def callback(ipaddr, port, device, partition, method, path,
@@ -151,6 +159,91 @@ class TestContainerController(TestRingBase):
for key in owner_headers:
self.assertIn(key, resp.headers)
+ def test_reseller_admin(self):
+ reseller_internal_headers = {
+ get_sys_meta_prefix('container') + 'sharding': 'True'}
+ reseller_external_headers = {'x-container-sharding': 'on'}
+ controller = proxy_server.ContainerController(self.app, 'a', 'c')
+
+ # Normal users, even swift owners, can't set it
+ req = Request.blank('/v1/a/c', method='PUT',
+ headers=reseller_external_headers,
+ environ={'swift_owner': True})
+ with mocked_http_conn(*[201] * self.CONTAINER_REPLICAS) as mock_conn:
+ resp = req.get_response(self.app)
+ self.assertEqual(2, resp.status_int // 100)
+ for key in reseller_internal_headers:
+ for captured in mock_conn.requests:
+ self.assertNotIn(key.title(), captured['headers'])
+
+ req = Request.blank('/v1/a/c', method='POST',
+ headers=reseller_external_headers,
+ environ={'swift_owner': True})
+ with mocked_http_conn(*[204] * self.CONTAINER_REPLICAS) as mock_conn:
+ resp = req.get_response(self.app)
+ self.assertEqual(2, resp.status_int // 100)
+ for key in reseller_internal_headers:
+ for captured in mock_conn.requests:
+ self.assertNotIn(key.title(), captured['headers'])
+
+ req = Request.blank('/v1/a/c', environ={'swift_owner': True})
+ # Heck, they don't even get to know
+ with mock.patch('swift.proxy.controllers.base.http_connect',
+ fake_http_connect(200, 200,
+ headers=reseller_internal_headers)):
+ resp = controller.HEAD(req)
+ self.assertEqual(2, resp.status_int // 100)
+ for key in reseller_external_headers:
+ self.assertNotIn(key, resp.headers)
+
+ with mock.patch('swift.proxy.controllers.base.http_connect',
+ fake_http_connect(200, 200,
+ headers=reseller_internal_headers)):
+ resp = controller.GET(req)
+ self.assertEqual(2, resp.status_int // 100)
+ for key in reseller_external_headers:
+ self.assertNotIn(key, resp.headers)
+
+ # But reseller admins can set it
+ req = Request.blank('/v1/a/c', method='PUT',
+ headers=reseller_external_headers,
+ environ={'reseller_request': True})
+ with mocked_http_conn(*[201] * self.CONTAINER_REPLICAS) as mock_conn:
+ resp = req.get_response(self.app)
+ self.assertEqual(2, resp.status_int // 100)
+ for key in reseller_internal_headers:
+ for captured in mock_conn.requests:
+ self.assertIn(key.title(), captured['headers'])
+
+ req = Request.blank('/v1/a/c', method='POST',
+ headers=reseller_external_headers,
+ environ={'reseller_request': True})
+ with mocked_http_conn(*[204] * self.CONTAINER_REPLICAS) as mock_conn:
+ resp = req.get_response(self.app)
+ self.assertEqual(2, resp.status_int // 100)
+ for key in reseller_internal_headers:
+ for captured in mock_conn.requests:
+ self.assertIn(key.title(), captured['headers'])
+
+ # And see that they have
+ req = Request.blank('/v1/a/c', environ={'reseller_request': True})
+ with mock.patch('swift.proxy.controllers.base.http_connect',
+ fake_http_connect(200, 200,
+ headers=reseller_internal_headers)):
+ resp = controller.HEAD(req)
+ self.assertEqual(2, resp.status_int // 100)
+ for key in reseller_external_headers:
+ self.assertIn(key, resp.headers)
+ self.assertEqual(resp.headers[key], 'True')
+
+ with mock.patch('swift.proxy.controllers.base.http_connect',
+ fake_http_connect(200, 200,
+ headers=reseller_internal_headers)):
+ resp = controller.GET(req)
+ self.assertEqual(2, resp.status_int // 100)
+ for key in reseller_external_headers:
+ self.assertEqual(resp.headers[key], 'True')
+
def test_sys_meta_headers_PUT(self):
# check that headers in sys meta namespace make it through
# the container controller
@@ -329,6 +422,852 @@ class TestContainerController(TestRingBase):
]
self._assert_responses('POST', POST_TEST_CASES)
+ def _make_shard_objects(self, shard_range):
+ lower = ord(shard_range.lower[0]) if shard_range.lower else ord('@')
+ upper = ord(shard_range.upper[0]) if shard_range.upper else ord('z')
+
+ objects = [{'name': chr(i), 'bytes': i, 'hash': 'hash%s' % chr(i),
+ 'content_type': 'text/plain', 'deleted': 0,
+ 'last_modified': next(self.ts_iter).isoformat}
+ for i in range(lower + 1, upper + 1)]
+ return objects
+
+ def _check_GET_shard_listing(self, mock_responses, expected_objects,
+ expected_requests, query_string='',
+ reverse=False):
+ # mock_responses is a list of tuples (status, json body, headers)
+ # expected objects is a list of dicts
+ # expected_requests is a list of tuples (path, hdrs dict, params dict)
+
+ # sanity check that expected objects is name ordered with no repeats
+ def name(obj):
+ return obj.get('name', obj.get('subdir'))
+
+ for (prev, next_) in zip(expected_objects, expected_objects[1:]):
+ if reverse:
+ self.assertGreater(name(prev), name(next_))
+ else:
+ self.assertLess(name(prev), name(next_))
+ container_path = '/v1/a/c' + query_string
+ codes = (resp[0] for resp in mock_responses)
+ bodies = iter([json.dumps(resp[1]) for resp in mock_responses])
+ exp_headers = [resp[2] for resp in mock_responses]
+ request = Request.blank(container_path)
+ with mocked_http_conn(
+ *codes, body_iter=bodies, headers=exp_headers) as fake_conn:
+ resp = request.get_response(self.app)
+ for backend_req in fake_conn.requests:
+ self.assertEqual(request.headers['X-Trans-Id'],
+ backend_req['headers']['X-Trans-Id'])
+ self.assertTrue(backend_req['headers']['User-Agent'].startswith(
+ 'proxy-server'))
+ self.assertEqual(200, resp.status_int)
+ actual_objects = json.loads(resp.body)
+ self.assertEqual(len(expected_objects), len(actual_objects))
+ self.assertEqual(expected_objects, actual_objects)
+ self.assertEqual(len(expected_requests), len(fake_conn.requests))
+ for i, ((exp_path, exp_headers, exp_params), req) in enumerate(
+ zip(expected_requests, fake_conn.requests)):
+ with annotate_failure('Request check at index %d.' % i):
+ # strip off /sdx/0/ from path
+ self.assertEqual(exp_path, req['path'][7:])
+ self.assertEqual(
+ dict(exp_params, format='json'),
+ dict(urllib.parse.parse_qsl(req['qs'], True)))
+ for k, v in exp_headers.items():
+ self.assertIn(k, req['headers'])
+ self.assertEqual(v, req['headers'][k])
+ self.assertNotIn('X-Backend-Override-Delete', req['headers'])
+ return resp
+
+ def check_response(self, resp, root_resp_hdrs, expected_objects=None):
+ info_hdrs = dict(root_resp_hdrs)
+ if expected_objects is None:
+ # default is to expect whatever the root container sent
+ expected_obj_count = root_resp_hdrs['X-Container-Object-Count']
+ expected_bytes_used = root_resp_hdrs['X-Container-Bytes-Used']
+ else:
+ expected_bytes_used = sum([o['bytes'] for o in expected_objects])
+ expected_obj_count = len(expected_objects)
+ info_hdrs['X-Container-Bytes-Used'] = expected_bytes_used
+ info_hdrs['X-Container-Object-Count'] = expected_obj_count
+ self.assertEqual(expected_bytes_used,
+ int(resp.headers['X-Container-Bytes-Used']))
+ self.assertEqual(expected_obj_count,
+ int(resp.headers['X-Container-Object-Count']))
+ self.assertEqual('sharded', resp.headers['X-Backend-Sharding-State'])
+ for k, v in root_resp_hdrs.items():
+ if k.lower().startswith('x-container-meta'):
+ self.assertEqual(v, resp.headers[k])
+ # check that info cache is correct for root container
+ info = get_container_info(resp.request.environ, self.app)
+ self.assertEqual(headers_to_container_info(info_hdrs), info)
+
+ def test_GET_sharded_container(self):
+ shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', ''))
+ shard_ranges = [
+ ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper)
+ for lower, upper in shard_bounds]
+ sr_dicts = [dict(sr) for sr in shard_ranges]
+ sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges]
+ shard_resp_hdrs = [
+ {'X-Backend-Sharding-State': 'unsharded',
+ 'X-Container-Object-Count': len(sr_objs[i]),
+ 'X-Container-Bytes-Used':
+ sum([obj['bytes'] for obj in sr_objs[i]]),
+ 'X-Container-Meta-Flavour': 'flavour%d' % i,
+ 'X-Backend-Storage-Policy-Index': 0}
+ for i in range(3)]
+
+ all_objects = []
+ for objects in sr_objs:
+ all_objects.extend(objects)
+ size_all_objects = sum([obj['bytes'] for obj in all_objects])
+ num_all_objects = len(all_objects)
+ limit = CONTAINER_LISTING_LIMIT
+ expected_objects = all_objects
+ root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded',
+ # pretend root object stats are not yet updated
+ 'X-Container-Object-Count': num_all_objects - 1,
+ 'X-Container-Bytes-Used': size_all_objects - 1,
+ 'X-Container-Meta-Flavour': 'peach',
+ 'X-Backend-Storage-Policy-Index': 0}
+ root_shard_resp_hdrs = dict(root_resp_hdrs)
+ root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard'
+
+ # GET all objects
+ # include some failed responses
+ mock_responses = [
+ # status, body, headers
+ (404, '', {}),
+ (200, sr_dicts, root_shard_resp_hdrs),
+ (200, sr_objs[0], shard_resp_hdrs[0]),
+ (200, sr_objs[1], shard_resp_hdrs[1]),
+ (200, sr_objs[2], shard_resp_hdrs[2])
+ ]
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing')), # 404
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing')), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='ham\x00', limit=str(limit),
+ states='listing')), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='pie\x00', states='listing',
+ limit=str(limit - len(sr_objs[0])))), # 200
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='p', end_marker='', states='listing',
+ limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200
+ ]
+
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs,
+ expected_objects=expected_objects)
+
+ # GET all objects - sharding, final shard range points back to root
+ root_range = ShardRange('a/c', Timestamp.now(), 'pie', '')
+ mock_responses = [
+ # status, body, headers
+ (200, sr_dicts[:2] + [dict(root_range)], root_shard_resp_hdrs),
+ (200, sr_objs[0], shard_resp_hdrs[0]),
+ (200, sr_objs[1], shard_resp_hdrs[1]),
+ (200, sr_objs[2], root_resp_hdrs)
+ ]
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing')), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='ham\x00', limit=str(limit),
+ states='listing')), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='pie\x00', states='listing',
+ limit=str(limit - len(sr_objs[0])))), # 200
+ (root_range.name, {'X-Backend-Record-Type': 'object'},
+ dict(marker='p', end_marker='',
+ limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200
+ ]
+
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs,
+ expected_objects=expected_objects)
+
+ # GET all objects in reverse
+ mock_responses = [
+ # status, body, headers
+ (200, list(reversed(sr_dicts)), root_shard_resp_hdrs),
+ (200, list(reversed(sr_objs[2])), shard_resp_hdrs[2]),
+ (200, list(reversed(sr_objs[1])), shard_resp_hdrs[1]),
+ (200, list(reversed(sr_objs[0])), shard_resp_hdrs[0]),
+ ]
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing', reverse='true')),
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='pie', reverse='true',
+ limit=str(limit), states='listing')), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='q', end_marker='ham', states='listing',
+ reverse='true', limit=str(limit - len(sr_objs[2])))), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='i', end_marker='', states='listing', reverse='true',
+ limit=str(limit - len(sr_objs[2] + sr_objs[1])))), # 200
+ ]
+
+ resp = self._check_GET_shard_listing(
+ mock_responses, list(reversed(expected_objects)),
+ expected_requests, query_string='?reverse=true', reverse=True)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs,
+ expected_objects=expected_objects)
+
+ # GET with limit param
+ limit = len(sr_objs[0]) + len(sr_objs[1]) + 1
+ expected_objects = all_objects[:limit]
+ mock_responses = [
+ (404, '', {}),
+ (200, sr_dicts, root_shard_resp_hdrs),
+ (200, sr_objs[0], shard_resp_hdrs[0]),
+ (200, sr_objs[1], shard_resp_hdrs[1]),
+ (200, sr_objs[2][:1], shard_resp_hdrs[2])
+ ]
+ expected_requests = [
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(limit=str(limit), states='listing')), # 404
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(limit=str(limit), states='listing')), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, # 200
+ dict(marker='', end_marker='ham\x00', states='listing',
+ limit=str(limit))),
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200
+ dict(marker='h', end_marker='pie\x00', states='listing',
+ limit=str(limit - len(sr_objs[0])))),
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, # 200
+ dict(marker='p', end_marker='', states='listing',
+ limit=str(limit - len(sr_objs[0] + sr_objs[1]))))
+ ]
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests,
+ query_string='?limit=%s' % limit)
+ self.check_response(resp, root_resp_hdrs)
+
+ # GET with marker
+ marker = sr_objs[1][2]['name']
+ first_included = len(sr_objs[0]) + 2
+ limit = CONTAINER_LISTING_LIMIT
+ expected_objects = all_objects[first_included:]
+ mock_responses = [
+ (404, '', {}),
+ (200, sr_dicts[1:], root_shard_resp_hdrs),
+ (404, '', {}),
+ (200, sr_objs[1][2:], shard_resp_hdrs[1]),
+ (200, sr_objs[2], shard_resp_hdrs[2])
+ ]
+ expected_requests = [
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(marker=marker, states='listing')), # 404
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(marker=marker, states='listing')), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 404
+ dict(marker=marker, end_marker='pie\x00', states='listing',
+ limit=str(limit))),
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200
+ dict(marker=marker, end_marker='pie\x00', states='listing',
+ limit=str(limit))),
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, # 200
+ dict(marker='p', end_marker='', states='listing',
+ limit=str(limit - len(sr_objs[1][2:])))),
+ ]
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests,
+ query_string='?marker=%s' % marker)
+ self.check_response(resp, root_resp_hdrs)
+
+ # GET with end marker
+ end_marker = sr_objs[1][6]['name']
+ first_excluded = len(sr_objs[0]) + 6
+ expected_objects = all_objects[:first_excluded]
+ mock_responses = [
+ (404, '', {}),
+ (200, sr_dicts[:2], root_shard_resp_hdrs),
+ (200, sr_objs[0], shard_resp_hdrs[0]),
+ (404, '', {}),
+ (200, sr_objs[1][:6], shard_resp_hdrs[1])
+ ]
+ expected_requests = [
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(end_marker=end_marker, states='listing')), # 404
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(end_marker=end_marker, states='listing')), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, # 200
+ dict(marker='', end_marker='ham\x00', states='listing',
+ limit=str(limit))),
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 404
+ dict(marker='h', end_marker=end_marker, states='listing',
+ limit=str(limit - len(sr_objs[0])))),
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200
+ dict(marker='h', end_marker=end_marker, states='listing',
+ limit=str(limit - len(sr_objs[0])))),
+ ]
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests,
+ query_string='?end_marker=%s' % end_marker)
+ self.check_response(resp, root_resp_hdrs)
+
+ # marker and end_marker and limit
+ limit = 2
+ expected_objects = all_objects[first_included:first_excluded]
+ mock_responses = [
+ (200, sr_dicts[1:2], root_shard_resp_hdrs),
+ (200, sr_objs[1][2:6], shard_resp_hdrs[1])
+ ]
+ expected_requests = [
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing', limit=str(limit),
+ marker=marker, end_marker=end_marker)), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200
+ dict(marker=marker, end_marker=end_marker, states='listing',
+ limit=str(limit))),
+ ]
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests,
+ query_string='?marker=%s&end_marker=%s&limit=%s'
+ % (marker, end_marker, limit))
+ self.check_response(resp, root_resp_hdrs)
+
+ # reverse with marker, end_marker
+ expected_objects.reverse()
+ mock_responses = [
+ (200, sr_dicts[1:2], root_shard_resp_hdrs),
+ (200, list(reversed(sr_objs[1][2:6])), shard_resp_hdrs[1])
+ ]
+ expected_requests = [
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(marker=end_marker, reverse='true', end_marker=marker,
+ limit=str(limit), states='listing',)), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200
+ dict(marker=end_marker, end_marker=marker, states='listing',
+ limit=str(limit), reverse='true')),
+ ]
+ self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests,
+ query_string='?marker=%s&end_marker=%s&limit=%s&reverse=true'
+ % (end_marker, marker, limit), reverse=True)
+ self.check_response(resp, root_resp_hdrs)
+
+ def test_GET_sharded_container_with_delimiter(self):
+ shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', ''))
+ shard_ranges = [
+ ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper)
+ for lower, upper in shard_bounds]
+ sr_dicts = [dict(sr) for sr in shard_ranges]
+ shard_resp_hdrs = {'X-Backend-Sharding-State': 'unsharded',
+ 'X-Container-Object-Count': 2,
+ 'X-Container-Bytes-Used': 4,
+ 'X-Backend-Storage-Policy-Index': 0}
+
+ limit = CONTAINER_LISTING_LIMIT
+ root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded',
+ # pretend root object stats are not yet updated
+ 'X-Container-Object-Count': 6,
+ 'X-Container-Bytes-Used': 12,
+ 'X-Backend-Storage-Policy-Index': 0}
+ root_shard_resp_hdrs = dict(root_resp_hdrs)
+ root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard'
+
+ sr_0_obj = {'name': 'apple',
+ 'bytes': 1,
+ 'hash': 'hash',
+ 'content_type': 'text/plain',
+ 'deleted': 0,
+ 'last_modified': next(self.ts_iter).isoformat}
+ sr_2_obj = {'name': 'pumpkin',
+ 'bytes': 1,
+ 'hash': 'hash',
+ 'content_type': 'text/plain',
+ 'deleted': 0,
+ 'last_modified': next(self.ts_iter).isoformat}
+ subdir = {'subdir': 'ha/'}
+ mock_responses = [
+ # status, body, headers
+ (200, sr_dicts, root_shard_resp_hdrs),
+ (200, [sr_0_obj, subdir], shard_resp_hdrs),
+ (200, [], shard_resp_hdrs),
+ (200, [sr_2_obj], shard_resp_hdrs)
+ ]
+ expected_requests = [
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing', delimiter='/')), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='ham\x00', limit=str(limit),
+ states='listing', delimiter='/')), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='ha/', end_marker='pie\x00', states='listing',
+ limit=str(limit - 2), delimiter='/')), # 200
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='ha/', end_marker='', states='listing',
+ limit=str(limit - 2), delimiter='/')) # 200
+ ]
+
+ expected_objects = [sr_0_obj, subdir, sr_2_obj]
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests,
+ query_string='?delimiter=/')
+ self.check_response(resp, root_resp_hdrs)
+
+ def test_GET_sharded_container_overlapping_shards(self):
+ # verify ordered listing even if unexpected overlapping shard ranges
+ shard_bounds = (('', 'ham', ShardRange.CLEAVED),
+ ('', 'pie', ShardRange.ACTIVE),
+ ('lemon', '', ShardRange.ACTIVE))
+ shard_ranges = [
+ ShardRange('.shards_a/c_' + upper, Timestamp.now(), lower, upper,
+ state=state)
+ for lower, upper, state in shard_bounds]
+ sr_dicts = [dict(sr) for sr in shard_ranges]
+ sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges]
+ shard_resp_hdrs = [
+ {'X-Backend-Sharding-State': 'unsharded',
+ 'X-Container-Object-Count': len(sr_objs[i]),
+ 'X-Container-Bytes-Used':
+ sum([obj['bytes'] for obj in sr_objs[i]]),
+ 'X-Container-Meta-Flavour': 'flavour%d' % i,
+ 'X-Backend-Storage-Policy-Index': 0}
+ for i in range(3)]
+
+ all_objects = []
+ for objects in sr_objs:
+ all_objects.extend(objects)
+ size_all_objects = sum([obj['bytes'] for obj in all_objects])
+ num_all_objects = len(all_objects)
+ limit = CONTAINER_LISTING_LIMIT
+ root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded',
+ # pretend root object stats are not yet updated
+ 'X-Container-Object-Count': num_all_objects - 1,
+ 'X-Container-Bytes-Used': size_all_objects - 1,
+ 'X-Container-Meta-Flavour': 'peach',
+ 'X-Backend-Storage-Policy-Index': 0}
+ root_shard_resp_hdrs = dict(root_resp_hdrs)
+ root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard'
+
+ # forwards listing
+
+ # expect subset of second shard range
+ objs_1 = [o for o in sr_objs[1] if o['name'] > sr_objs[0][-1]['name']]
+ # expect subset of third shard range
+ objs_2 = [o for o in sr_objs[2] if o['name'] > sr_objs[1][-1]['name']]
+ mock_responses = [
+ # status, body, headers
+ (200, sr_dicts, root_shard_resp_hdrs),
+ (200, sr_objs[0], shard_resp_hdrs[0]),
+ (200, objs_1, shard_resp_hdrs[1]),
+ (200, objs_2, shard_resp_hdrs[2])
+ ]
+ # NB marker always advances to last object name
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing')), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='ham\x00', states='listing',
+ limit=str(limit))), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='pie\x00', states='listing',
+ limit=str(limit - len(sr_objs[0])))), # 200
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='p', end_marker='', states='listing',
+ limit=str(limit - len(sr_objs[0] + objs_1)))) # 200
+ ]
+
+ expected_objects = sr_objs[0] + objs_1 + objs_2
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs,
+ expected_objects=expected_objects)
+
+ # reverse listing
+
+ # expect subset of third shard range
+ objs_0 = [o for o in sr_objs[0] if o['name'] < sr_objs[1][0]['name']]
+ # expect subset of second shard range
+ objs_1 = [o for o in sr_objs[1] if o['name'] < sr_objs[2][0]['name']]
+ mock_responses = [
+ # status, body, headers
+ (200, list(reversed(sr_dicts)), root_shard_resp_hdrs),
+ (200, list(reversed(sr_objs[2])), shard_resp_hdrs[2]),
+ (200, list(reversed(objs_1)), shard_resp_hdrs[1]),
+ (200, list(reversed(objs_0)), shard_resp_hdrs[0]),
+ ]
+ # NB marker always advances to last object name
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing', reverse='true')), # 200
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='lemon', states='listing',
+ limit=str(limit),
+ reverse='true')), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='m', end_marker='', reverse='true', states='listing',
+ limit=str(limit - len(sr_objs[2])))), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='A', end_marker='', reverse='true', states='listing',
+ limit=str(limit - len(sr_objs[2] + objs_1)))) # 200
+ ]
+
+ expected_objects = list(reversed(objs_0 + objs_1 + sr_objs[2]))
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests,
+ query_string='?reverse=true', reverse=True)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs,
+ expected_objects=expected_objects)
+
+ def test_GET_sharded_container_gap_in_shards(self):
+ # verify ordered listing even if unexpected gap between shard ranges
+ shard_bounds = (('', 'ham'), ('onion', 'pie'), ('rhubarb', ''))
+ shard_ranges = [
+ ShardRange('.shards_a/c_' + upper, Timestamp.now(), lower, upper)
+ for lower, upper in shard_bounds]
+ sr_dicts = [dict(sr) for sr in shard_ranges]
+ sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges]
+ shard_resp_hdrs = [
+ {'X-Backend-Sharding-State': 'unsharded',
+ 'X-Container-Object-Count': len(sr_objs[i]),
+ 'X-Container-Bytes-Used':
+ sum([obj['bytes'] for obj in sr_objs[i]]),
+ 'X-Container-Meta-Flavour': 'flavour%d' % i,
+ 'X-Backend-Storage-Policy-Index': 0}
+ for i in range(3)]
+
+ all_objects = []
+ for objects in sr_objs:
+ all_objects.extend(objects)
+ size_all_objects = sum([obj['bytes'] for obj in all_objects])
+ num_all_objects = len(all_objects)
+ limit = CONTAINER_LISTING_LIMIT
+ root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded',
+ 'X-Container-Object-Count': num_all_objects,
+ 'X-Container-Bytes-Used': size_all_objects,
+ 'X-Container-Meta-Flavour': 'peach',
+ 'X-Backend-Storage-Policy-Index': 0}
+ root_shard_resp_hdrs = dict(root_resp_hdrs)
+ root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard'
+
+ mock_responses = [
+ # status, body, headers
+ (200, sr_dicts, root_shard_resp_hdrs),
+ (200, sr_objs[0], shard_resp_hdrs[0]),
+ (200, sr_objs[1], shard_resp_hdrs[1]),
+ (200, sr_objs[2], shard_resp_hdrs[2])
+ ]
+ # NB marker always advances to last object name
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing')), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='ham\x00', states='listing',
+ limit=str(limit))), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='pie\x00', states='listing',
+ limit=str(limit - len(sr_objs[0])))), # 200
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='p', end_marker='', states='listing',
+ limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200
+ ]
+
+ resp = self._check_GET_shard_listing(
+ mock_responses, all_objects, expected_requests)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs)
+
+ def test_GET_sharded_container_empty_shard(self):
+ # verify ordered listing when a shard is empty
+ shard_bounds = (('', 'ham'), ('ham', 'pie'), ('lemon', ''))
+ shard_ranges = [
+ ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper)
+ for lower, upper in shard_bounds]
+ sr_dicts = [dict(sr) for sr in shard_ranges]
+ sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges]
+ # empty second shard range
+ sr_objs[1] = []
+ shard_resp_hdrs = [
+ {'X-Backend-Sharding-State': 'unsharded',
+ 'X-Container-Object-Count': len(sr_objs[i]),
+ 'X-Container-Bytes-Used':
+ sum([obj['bytes'] for obj in sr_objs[i]]),
+ 'X-Container-Meta-Flavour': 'flavour%d' % i,
+ 'X-Backend-Storage-Policy-Index': 0}
+ for i in range(3)]
+
+ all_objects = []
+ for objects in sr_objs:
+ all_objects.extend(objects)
+ size_all_objects = sum([obj['bytes'] for obj in all_objects])
+ num_all_objects = len(all_objects)
+ limit = CONTAINER_LISTING_LIMIT
+ root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded',
+ 'X-Container-Object-Count': num_all_objects,
+ 'X-Container-Bytes-Used': size_all_objects,
+ 'X-Container-Meta-Flavour': 'peach',
+ 'X-Backend-Storage-Policy-Index': 0}
+ root_shard_resp_hdrs = dict(root_resp_hdrs)
+ root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard'
+
+ mock_responses = [
+ # status, body, headers
+ (200, sr_dicts, root_shard_resp_hdrs),
+ (200, sr_objs[0], shard_resp_hdrs[0]),
+ (200, sr_objs[1], shard_resp_hdrs[1]),
+ (200, sr_objs[2], shard_resp_hdrs[2])
+ ]
+ # NB marker always advances to last object name
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing')), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='ham\x00', states='listing',
+ limit=str(limit))), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='pie\x00', states='listing',
+ limit=str(limit - len(sr_objs[0])))), # 200
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='', states='listing',
+ limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200
+ ]
+
+ resp = self._check_GET_shard_listing(
+ mock_responses, all_objects, expected_requests)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs)
+
+ # marker in empty second range
+ mock_responses = [
+ # status, body, headers
+ (200, sr_dicts[1:], root_shard_resp_hdrs),
+ (200, sr_objs[1], shard_resp_hdrs[1]),
+ (200, sr_objs[2], shard_resp_hdrs[2])
+ ]
+ # NB marker unchanged when getting from third range
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing', marker='koolaid')), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='koolaid', end_marker='pie\x00', states='listing',
+ limit=str(limit))), # 200
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='koolaid', end_marker='', states='listing',
+ limit=str(limit))) # 200
+ ]
+
+ resp = self._check_GET_shard_listing(
+ mock_responses, sr_objs[2], expected_requests,
+ query_string='?marker=koolaid')
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs)
+
+ # marker in empty second range, reverse
+ mock_responses = [
+ # status, body, headers
+ (200, list(reversed(sr_dicts[:2])), root_shard_resp_hdrs),
+ (200, list(reversed(sr_objs[1])), shard_resp_hdrs[1]),
+ (200, list(reversed(sr_objs[0])), shard_resp_hdrs[2])
+ ]
+ # NB marker unchanged when getting from first range
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing', marker='koolaid', reverse='true')), # 200
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='koolaid', end_marker='ham', reverse='true',
+ states='listing', limit=str(limit))), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='koolaid', end_marker='', reverse='true',
+ states='listing', limit=str(limit))) # 200
+ ]
+
+ resp = self._check_GET_shard_listing(
+ mock_responses, list(reversed(sr_objs[0])), expected_requests,
+ query_string='?marker=koolaid&reverse=true', reverse=True)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs)
+
+ def _check_GET_sharded_container_shard_error(self, error):
+ # verify ordered listing when a shard is empty
+ shard_bounds = (('', 'ham'), ('ham', 'pie'), ('lemon', ''))
+ shard_ranges = [
+ ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper)
+ for lower, upper in shard_bounds]
+ sr_dicts = [dict(sr) for sr in shard_ranges]
+ sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges]
+ # empty second shard range
+ sr_objs[1] = []
+ shard_resp_hdrs = [
+ {'X-Backend-Sharding-State': 'unsharded',
+ 'X-Container-Object-Count': len(sr_objs[i]),
+ 'X-Container-Bytes-Used':
+ sum([obj['bytes'] for obj in sr_objs[i]]),
+ 'X-Container-Meta-Flavour': 'flavour%d' % i,
+ 'X-Backend-Storage-Policy-Index': 0}
+ for i in range(3)]
+
+ all_objects = []
+ for objects in sr_objs:
+ all_objects.extend(objects)
+ size_all_objects = sum([obj['bytes'] for obj in all_objects])
+ num_all_objects = len(all_objects)
+ limit = CONTAINER_LISTING_LIMIT
+ root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded',
+ 'X-Container-Object-Count': num_all_objects,
+ 'X-Container-Bytes-Used': size_all_objects,
+ 'X-Container-Meta-Flavour': 'peach',
+ 'X-Backend-Storage-Policy-Index': 0}
+ root_shard_resp_hdrs = dict(root_resp_hdrs)
+ root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard'
+
+ mock_responses = [
+ # status, body, headers
+ (200, sr_dicts, root_shard_resp_hdrs),
+ (200, sr_objs[0], shard_resp_hdrs[0])] + \
+ [(error, [], {})] * 2 * self.CONTAINER_REPLICAS + \
+ [(200, sr_objs[2], shard_resp_hdrs[2])]
+
+ # NB marker always advances to last object name
+ expected_requests = [
+ # path, headers, params
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing')), # 200
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='ham\x00', states='listing',
+ limit=str(limit)))] \
+ + [(shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='pie\x00', states='listing',
+ limit=str(limit - len(sr_objs[0]))))
+ ] * 2 * self.CONTAINER_REPLICAS \
+ + [(shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='', states='listing',
+ limit=str(limit - len(sr_objs[0] + sr_objs[1]))))]
+
+ resp = self._check_GET_shard_listing(
+ mock_responses, all_objects, expected_requests)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs)
+
+ def test_GET_sharded_container_shard_errors(self):
+ self._check_GET_sharded_container_shard_error(404)
+ self._check_GET_sharded_container_shard_error(500)
+
+ def test_GET_sharded_container_sharding_shard(self):
+ # one shard is in process of sharding
+ shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', ''))
+ shard_ranges = [
+ ShardRange('.shards_a/c_' + upper, Timestamp.now(), lower, upper)
+ for lower, upper in shard_bounds]
+ sr_dicts = [dict(sr) for sr in shard_ranges]
+ sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges]
+ shard_resp_hdrs = [
+ {'X-Backend-Sharding-State': 'unsharded',
+ 'X-Container-Object-Count': len(sr_objs[i]),
+ 'X-Container-Bytes-Used':
+ sum([obj['bytes'] for obj in sr_objs[i]]),
+ 'X-Container-Meta-Flavour': 'flavour%d' % i,
+ 'X-Backend-Storage-Policy-Index': 0}
+ for i in range(3)]
+ shard_1_shard_resp_hdrs = dict(shard_resp_hdrs[1])
+ shard_1_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard'
+
+ # second shard is sharding and has cleaved two out of three sub shards
+ shard_resp_hdrs[1]['X-Backend-Sharding-State'] = 'sharding'
+ sub_shard_bounds = (('ham', 'juice'), ('juice', 'lemon'))
+ sub_shard_ranges = [
+ ShardRange('a/c_sub_' + upper, Timestamp.now(), lower, upper)
+ for lower, upper in sub_shard_bounds]
+ sub_sr_dicts = [dict(sr) for sr in sub_shard_ranges]
+ sub_sr_objs = [self._make_shard_objects(sr) for sr in sub_shard_ranges]
+ sub_shard_resp_hdrs = [
+ {'X-Backend-Sharding-State': 'unsharded',
+ 'X-Container-Object-Count': len(sub_sr_objs[i]),
+ 'X-Container-Bytes-Used':
+ sum([obj['bytes'] for obj in sub_sr_objs[i]]),
+ 'X-Container-Meta-Flavour': 'flavour%d' % i,
+ 'X-Backend-Storage-Policy-Index': 0}
+ for i in range(2)]
+
+ all_objects = []
+ for objects in sr_objs:
+ all_objects.extend(objects)
+ size_all_objects = sum([obj['bytes'] for obj in all_objects])
+ num_all_objects = len(all_objects)
+ limit = CONTAINER_LISTING_LIMIT
+ root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded',
+ 'X-Container-Object-Count': num_all_objects,
+ 'X-Container-Bytes-Used': size_all_objects,
+ 'X-Container-Meta-Flavour': 'peach',
+ 'X-Backend-Storage-Policy-Index': 0}
+ root_shard_resp_hdrs = dict(root_resp_hdrs)
+ root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard'
+
+ mock_responses = [
+ # status, body, headers
+ (200, sr_dicts, root_shard_resp_hdrs),
+ (200, sr_objs[0], shard_resp_hdrs[0]),
+ (200, sub_sr_dicts + [sr_dicts[1]], shard_1_shard_resp_hdrs),
+ (200, sub_sr_objs[0], sub_shard_resp_hdrs[0]),
+ (200, sub_sr_objs[1], sub_shard_resp_hdrs[1]),
+ (200, sr_objs[1][len(sub_sr_objs[0] + sub_sr_objs[1]):],
+ shard_resp_hdrs[1]),
+ (200, sr_objs[2], shard_resp_hdrs[2])
+ ]
+ # NB marker always advances to last object name
+ expected_requests = [
+ # get root shard ranges
+ ('a/c', {'X-Backend-Record-Type': 'auto'},
+ dict(states='listing')), # 200
+ # get first shard objects
+ (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='', end_marker='ham\x00', states='listing',
+ limit=str(limit))), # 200
+ # get second shard sub-shard ranges
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='pie\x00', states='listing',
+ limit=str(limit - len(sr_objs[0])))),
+ # get first sub-shard objects
+ (sub_shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='h', end_marker='juice\x00', states='listing',
+ limit=str(limit - len(sr_objs[0])))),
+ # get second sub-shard objects
+ (sub_shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='j', end_marker='lemon\x00', states='listing',
+ limit=str(limit - len(sr_objs[0] + sub_sr_objs[0])))),
+ # get remainder of first shard objects
+ (shard_ranges[1].name, {'X-Backend-Record-Type': 'object'},
+ dict(marker='l', end_marker='pie\x00',
+ limit=str(limit - len(sr_objs[0] + sub_sr_objs[0] +
+ sub_sr_objs[1])))), # 200
+ # get third shard objects
+ (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
+ dict(marker='p', end_marker='', states='listing',
+ limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200
+ ]
+ expected_objects = (
+ sr_objs[0] + sub_sr_objs[0] + sub_sr_objs[1] +
+ sr_objs[1][len(sub_sr_objs[0] + sub_sr_objs[1]):] + sr_objs[2])
+ resp = self._check_GET_shard_listing(
+ mock_responses, expected_objects, expected_requests)
+ # root object count will overridden by actual length of listing
+ self.check_response(resp, root_resp_hdrs)
+
@patch_policies(
[StoragePolicy(0, 'zero', True, object_ring=FakeRing(replicas=4))])
diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py
index bee74c380a..8e67abb009 100644
--- a/test/unit/proxy/test_server.py
+++ b/test/unit/proxy/test_server.py
@@ -47,7 +47,7 @@ from eventlet.green import httplib
from six import BytesIO
from six import StringIO
from six.moves import range
-from six.moves.urllib.parse import quote
+from six.moves.urllib.parse import quote, parse_qsl
from test import listen_zero
from test.unit import (
@@ -3222,95 +3222,197 @@ class TestReplicatedObjectController(
# reset the router post patch_policies
self.app.obj_controller_router = proxy_server.ObjectControllerRouter()
self.app.sort_nodes = lambda nodes, *args, **kwargs: nodes
- backend_requests = []
- def capture_requests(ip, port, method, path, headers, *args,
- **kwargs):
- backend_requests.append((method, path, headers))
+ def do_test(resp_headers):
+ self.app.memcache.store = {}
+ backend_requests = []
- req = Request.blank('/v1/a/c/o', {}, method='POST',
- headers={'X-Object-Meta-Color': 'Blue',
- 'Content-Type': 'text/plain'})
+ def capture_requests(ip, port, method, path, headers, *args,
+ **kwargs):
+ backend_requests.append((method, path, headers))
- # we want the container_info response to says a policy index of 1
- resp_headers = {'X-Backend-Storage-Policy-Index': 1}
- with mocked_http_conn(
- 200, 200, 202, 202, 202,
- headers=resp_headers, give_connect=capture_requests
- ) as fake_conn:
- resp = req.get_response(self.app)
- self.assertRaises(StopIteration, fake_conn.code_iter.next)
+ req = Request.blank('/v1/a/c/o', {}, method='POST',
+ headers={'X-Object-Meta-Color': 'Blue',
+ 'Content-Type': 'text/plain'})
- self.assertEqual(resp.status_int, 202)
- self.assertEqual(len(backend_requests), 5)
+ # we want the container_info response to says a policy index of 1
+ with mocked_http_conn(
+ 200, 200, 202, 202, 202,
+ headers=resp_headers, give_connect=capture_requests
+ ) as fake_conn:
+ resp = req.get_response(self.app)
+ self.assertRaises(StopIteration, fake_conn.code_iter.next)
- def check_request(req, method, path, headers=None):
- req_method, req_path, req_headers = req
- self.assertEqual(method, req_method)
- # caller can ignore leading path parts
- self.assertTrue(req_path.endswith(path),
- 'expected path to end with %s, it was %s' % (
- path, req_path))
- headers = headers or {}
- # caller can ignore some headers
- for k, v in headers.items():
- self.assertEqual(req_headers[k], v)
- account_request = backend_requests.pop(0)
- check_request(account_request, method='HEAD', path='/sda/0/a')
- container_request = backend_requests.pop(0)
- check_request(container_request, method='HEAD', path='/sda/0/a/c')
- # make sure backend requests included expected container headers
- container_headers = {}
- for request in backend_requests:
- req_headers = request[2]
- device = req_headers['x-container-device']
- host = req_headers['x-container-host']
- container_headers[device] = host
- expectations = {
- 'method': 'POST',
- 'path': '/0/a/c/o',
- 'headers': {
- 'X-Container-Partition': '0',
- 'Connection': 'close',
- 'User-Agent': 'proxy-server %s' % os.getpid(),
- 'Host': 'localhost:80',
- 'Referer': 'POST http://localhost/v1/a/c/o',
- 'X-Object-Meta-Color': 'Blue',
- 'X-Backend-Storage-Policy-Index': '1'
- },
- }
- check_request(request, **expectations)
+ self.assertEqual(resp.status_int, 202)
+ self.assertEqual(len(backend_requests), 5)
- expected = {}
- for i, device in enumerate(['sda', 'sdb', 'sdc']):
- expected[device] = '10.0.0.%d:100%d' % (i, i)
- self.assertEqual(container_headers, expected)
+ def check_request(req, method, path, headers=None):
+ req_method, req_path, req_headers = req
+ self.assertEqual(method, req_method)
+ # caller can ignore leading path parts
+ self.assertTrue(req_path.endswith(path),
+ 'expected path to end with %s, it was %s' % (
+ path, req_path))
+ headers = headers or {}
+ # caller can ignore some headers
+ for k, v in headers.items():
+ self.assertEqual(req_headers[k], v)
+ self.assertNotIn('X-Backend-Container-Path', req_headers)
- # and again with policy override
- self.app.memcache.store = {}
- backend_requests = []
- req = Request.blank('/v1/a/c/o', {}, method='POST',
- headers={'X-Object-Meta-Color': 'Blue',
- 'Content-Type': 'text/plain',
- 'X-Backend-Storage-Policy-Index': 0})
- with mocked_http_conn(
- 200, 200, 202, 202, 202,
- headers=resp_headers, give_connect=capture_requests
- ) as fake_conn:
- resp = req.get_response(self.app)
- self.assertRaises(StopIteration, fake_conn.code_iter.next)
- self.assertEqual(resp.status_int, 202)
- self.assertEqual(len(backend_requests), 5)
- for request in backend_requests[2:]:
- expectations = {
- 'method': 'POST',
- 'path': '/0/a/c/o', # ignore device bit
- 'headers': {
- 'X-Object-Meta-Color': 'Blue',
- 'X-Backend-Storage-Policy-Index': '0',
+ account_request = backend_requests.pop(0)
+ check_request(account_request, method='HEAD', path='/sda/0/a')
+ container_request = backend_requests.pop(0)
+ check_request(container_request, method='HEAD', path='/sda/0/a/c')
+ # make sure backend requests included expected container headers
+ container_headers = {}
+ for request in backend_requests:
+ req_headers = request[2]
+ device = req_headers['x-container-device']
+ host = req_headers['x-container-host']
+ container_headers[device] = host
+ expectations = {
+ 'method': 'POST',
+ 'path': '/0/a/c/o',
+ 'headers': {
+ 'X-Container-Partition': '0',
+ 'Connection': 'close',
+ 'User-Agent': 'proxy-server %s' % os.getpid(),
+ 'Host': 'localhost:80',
+ 'Referer': 'POST http://localhost/v1/a/c/o',
+ 'X-Object-Meta-Color': 'Blue',
+ 'X-Backend-Storage-Policy-Index': '1'
+ },
}
- }
- check_request(request, **expectations)
+ check_request(request, **expectations)
+
+ expected = {}
+ for i, device in enumerate(['sda', 'sdb', 'sdc']):
+ expected[device] = '10.0.0.%d:100%d' % (i, i)
+ self.assertEqual(container_headers, expected)
+
+ # and again with policy override
+ self.app.memcache.store = {}
+ backend_requests = []
+ req = Request.blank('/v1/a/c/o', {}, method='POST',
+ headers={'X-Object-Meta-Color': 'Blue',
+ 'Content-Type': 'text/plain',
+ 'X-Backend-Storage-Policy-Index': 0})
+ with mocked_http_conn(
+ 200, 200, 202, 202, 202,
+ headers=resp_headers, give_connect=capture_requests
+ ) as fake_conn:
+ resp = req.get_response(self.app)
+ self.assertRaises(StopIteration, fake_conn.code_iter.next)
+ self.assertEqual(resp.status_int, 202)
+ self.assertEqual(len(backend_requests), 5)
+ for request in backend_requests[2:]:
+ expectations = {
+ 'method': 'POST',
+ 'path': '/0/a/c/o', # ignore device bit
+ 'headers': {
+ 'X-Object-Meta-Color': 'Blue',
+ 'X-Backend-Storage-Policy-Index': '0',
+ }
+ }
+ check_request(request, **expectations)
+
+ resp_headers = {'X-Backend-Storage-Policy-Index': 1}
+ do_test(resp_headers)
+ resp_headers['X-Backend-Sharding-State'] = 'unsharded'
+ do_test(resp_headers)
+
+ @patch_policies([
+ StoragePolicy(0, 'zero', is_default=True, object_ring=FakeRing()),
+ StoragePolicy(1, 'one', object_ring=FakeRing()),
+ ])
+ def test_backend_headers_update_shard_container(self):
+ # verify that when container is sharded the backend container update is
+ # directed to the shard container
+ # reset the router post patch_policies
+ self.app.obj_controller_router = proxy_server.ObjectControllerRouter()
+ self.app.sort_nodes = lambda nodes, *args, **kwargs: nodes
+
+ def do_test(method, sharding_state):
+ self.app.memcache.store = {}
+ req = Request.blank('/v1/a/c/o', {}, method=method, body='',
+ headers={'Content-Type': 'text/plain'})
+
+ # we want the container_info response to say policy index of 1 and
+ # sharding state
+ # acc HEAD, cont HEAD, cont shard GET, obj POSTs
+ status_codes = (200, 200, 200, 202, 202, 202)
+ resp_headers = {'X-Backend-Storage-Policy-Index': 1,
+ 'x-backend-sharding-state': sharding_state,
+ 'X-Backend-Record-Type': 'shard'}
+ shard_range = utils.ShardRange(
+ '.shards_a/c_shard', utils.Timestamp.now(), 'l', 'u')
+ body = json.dumps([dict(shard_range)])
+ with mocked_http_conn(*status_codes, headers=resp_headers,
+ body=body) as fake_conn:
+ resp = req.get_response(self.app)
+
+ self.assertEqual(resp.status_int, 202)
+ backend_requests = fake_conn.requests
+
+ def check_request(req, method, path, headers=None, params=None):
+ self.assertEqual(method, req['method'])
+ # caller can ignore leading path parts
+ self.assertTrue(req['path'].endswith(path),
+ 'expected path to end with %s, it was %s' % (
+ path, req['path']))
+ headers = headers or {}
+ # caller can ignore some headers
+ for k, v in headers.items():
+ self.assertEqual(req['headers'][k], v,
+ 'Expected %s but got %s for key %s' %
+ (v, req['headers'][k], k))
+ params = params or {}
+ req_params = dict(parse_qsl(req['qs'])) if req['qs'] else {}
+ for k, v in params.items():
+ self.assertEqual(req_params[k], v,
+ 'Expected %s but got %s for key %s' %
+ (v, req_params[k], k))
+
+ account_request = backend_requests[0]
+ check_request(account_request, method='HEAD', path='/sda/0/a')
+ container_request = backend_requests[1]
+ check_request(container_request, method='HEAD', path='/sda/0/a/c')
+ container_request_shard = backend_requests[2]
+ check_request(
+ container_request_shard, method='GET', path='/sda/0/a/c',
+ params={'includes': 'o'})
+
+ # make sure backend requests included expected container headers
+ container_headers = {}
+
+ for request in backend_requests[3:]:
+ req_headers = request['headers']
+ device = req_headers['x-container-device']
+ container_headers[device] = req_headers['x-container-host']
+ expectations = {
+ 'method': method,
+ 'path': '/0/a/c/o',
+ 'headers': {
+ 'X-Container-Partition': '0',
+ 'Host': 'localhost:80',
+ 'Referer': '%s http://localhost/v1/a/c/o' % method,
+ 'X-Backend-Storage-Policy-Index': '1',
+ 'X-Backend-Container-Path': shard_range.name
+ },
+ }
+ check_request(request, **expectations)
+
+ expected = {}
+ for i, device in enumerate(['sda', 'sdb', 'sdc']):
+ expected[device] = '10.0.0.%d:100%d' % (i, i)
+ self.assertEqual(container_headers, expected)
+
+ do_test('POST', 'sharding')
+ do_test('POST', 'sharded')
+ do_test('DELETE', 'sharding')
+ do_test('DELETE', 'sharded')
+ do_test('PUT', 'sharding')
+ do_test('PUT', 'sharded')
def test_DELETE(self):
with save_globals():
@@ -8356,6 +8458,29 @@ class TestContainerController(unittest.TestCase):
self.assertEqual(res.content_length, 0)
self.assertNotIn('transfer-encoding', res.headers)
+ def test_GET_account_non_existent(self):
+ with save_globals():
+ set_http_connect(404, 404, 404)
+ controller = proxy_server.ContainerController(self.app, 'a', 'c')
+ req = Request.blank('/v1/a/c')
+ self.app.update_request(req)
+ res = controller.GET(req)
+ self.assertEqual(res.status_int, 404)
+ self.assertNotIn('container/a/c', res.environ['swift.infocache'])
+
+ def test_GET_auto_create_prefix_account_non_existent(self):
+ with save_globals():
+ set_http_connect(404, 404, 404, 204, 204, 204)
+ controller = proxy_server.ContainerController(self.app, '.a', 'c')
+ req = Request.blank('/v1/a/c')
+ self.app.update_request(req)
+ res = controller.GET(req)
+ self.assertEqual(res.status_int, 204)
+ ic = res.environ['swift.infocache']
+ self.assertEqual(ic['container/.a/c']['status'], 204)
+ self.assertEqual(res.content_length, 0)
+ self.assertNotIn('transfer-encoding', res.headers)
+
def test_GET_calls_authorize(self):
called = [False]