Browse Source

Merge remote-tracking branch 'remotes/origin/master' into merge-master

Change-Id: I87cf4285d1d79aa86f690faa32f4b86076080f7a
changes/38/517838/1
Kota Tsuyuzaki 3 years ago
parent
commit
76917dfb1d
46 changed files with 1914 additions and 763 deletions
  1. +182
    -0
      doc/manpages/container-reconciler.conf.5
  2. +6
    -5
      doc/manpages/object-server.conf.5
  3. +21
    -8
      doc/source/api/large_objects.rst
  4. +113
    -113
      doc/source/deployment_guide.rst
  5. +55
    -55
      doc/source/overview_policies.rst
  6. +6
    -5
      etc/object-server.conf-sample
  7. +5
    -0
      etc/proxy-server.conf-sample
  8. +4
    -4
      swift/account/reaper.py
  9. +5
    -4
      swift/common/db_replicator.py
  10. +32
    -24
      swift/common/middleware/bulk.py
  11. +154
    -56
      swift/common/middleware/slo.py
  12. +4
    -4
      swift/common/ring/builder.py
  13. +10
    -6
      swift/common/ring/composite_builder.py
  14. +2
    -6
      swift/common/storage_policy.py
  15. +46
    -8
      swift/common/utils.py
  16. +6
    -5
      swift/container/updater.py
  17. +2
    -2
      swift/obj/auditor.py
  18. +38
    -16
      swift/obj/diskfile.py
  19. +10
    -7
      swift/obj/replicator.py
  20. +4
    -6
      swift/obj/updater.py
  21. +49
    -22
      swift/proxy/controllers/base.py
  22. +2
    -0
      test/functional/swift_test_client.py
  23. +95
    -0
      test/functional/test_slo.py
  24. +1
    -1
      test/probe/common.py
  25. +63
    -0
      test/probe/test_object_handoff.py
  26. +3
    -2
      test/probe/test_replication_servers_working.py
  27. +6
    -8
      test/unit/account/test_reaper.py
  28. +6
    -2
      test/unit/common/middleware/test_bulk.py
  29. +27
    -76
      test/unit/common/middleware/test_memcache.py
  30. +249
    -3
      test/unit/common/middleware/test_slo.py
  31. +23
    -0
      test/unit/common/ring/test_builder.py
  32. +226
    -90
      test/unit/common/ring/test_composite_builder.py
  33. +3
    -11
      test/unit/common/ring/test_ring.py
  34. +2
    -5
      test/unit/common/test_bufferedhttp.py
  35. +106
    -63
      test/unit/common/test_db_replicator.py
  36. +34
    -8
      test/unit/common/test_utils.py
  37. +5
    -5
      test/unit/container/test_replicator.py
  38. +16
    -18
      test/unit/container/test_server.py
  39. +9
    -13
      test/unit/container/test_updater.py
  40. +10
    -11
      test/unit/obj/test_auditor.py
  41. +195
    -59
      test/unit/obj/test_diskfile.py
  42. +15
    -1
      test/unit/obj/test_replicator.py
  43. +1
    -1
      test/unit/obj/test_ssync.py
  44. +1
    -1
      test/unit/obj/test_ssync_receiver.py
  45. +33
    -29
      test/unit/obj/test_updater.py
  46. +29
    -0
      test/unit/proxy/controllers/test_obj.py

+ 182
- 0
doc/manpages/container-reconciler.conf.5 View File

@ -0,0 +1,182 @@
.\"
.\" Author: HCLTech-SSW <hcl_ss_oss@hcl.com>
.\" Copyright (c) 2010-2017 OpenStack Foundation.
.\"
.\" Licensed under the Apache License, Version 2.0 (the "License");
.\" you may not use this file except in compliance with the License.
.\" You may obtain a copy of the License at
.\"
.\" http://www.apache.org/licenses/LICENSE-2.0
.\"
.\" Unless required by applicable law or agreed to in writing, software
.\" distributed under the License is distributed on an "AS IS" BASIS,
.\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
.\" implied.
.\" See the License for the specific language governing permissions and
.\" limitations under the License.
.\"
.TH container-reconciler.conf 5 "10/25/2017" "Linux" "OpenStack Swift"
.SH NAME
.LP
.B container-reconciler.conf
\- configuration file for the OpenStack Swift container reconciler
.SH SYNOPSIS
.LP
.B container-reconciler.conf
.SH DESCRIPTION
.PP
This is the configuration file used by the container reconciler.
The configuration file follows the python-pastedeploy syntax. The file is divided
into sections, which are enclosed by square brackets. Each section will contain a
certain number of key/value parameters which are described later.
Any line that begins with a '#' symbol is ignored.
You can find more information about python-pastedeploy configuration format at
\fIhttp://pythonpaste.org/deploy/#config-format\fR
.SH GLOBAL SECTION
.PD 1
.RS 0
This is indicated by section named [DEFAULT]. Below are the parameters that
are acceptable within this section.
.IP "\fBlog_address\fR"
Location where syslog sends the logs to. The default is /dev/log.
.IP "\fBlog_custom_handlers \fR"
Comma-separated list of functions to call to setup custom log handlers.
.IP "\fBlog_facility\fR"
Syslog log facility. The default is LOG_LOCAL0.
.IP "\fBlog_level\fR"
Log level used for logging. The default is INFO.
.IP "\fBlog_name\fR"
Label used when logging. The default is swift.
.IP "\fBlog_statsd_default_sample_rate\fR"
Defines the probability of sending a sample for any given event or
timing measurement. The default is 1.0.
.IP "\fBlog_statsd_host\fR"
If not set, the StatsD feature is disabled. The default is localhost.
.IP "\fBlog_statsd_metric_prefix\fR"
Value will be prepended to every metric sent to the StatsD server.
.IP "\fBlog_statsd_port\fR"
The port value for the StatsD server. The default is 8125.
.IP "\fBlog_statsd_sample_rate_factor\fR"
It is not recommended to set this to a value less than 1.0, if frequency of
logging is too high, tune the log_statsd_default_sample_rate instead.
The default value is 1.0.
.IP "\fBlog_udp_host\fR"
If not set, the UDP receiver for syslog is disabled.
.IP "\fBlog_udp_port\fR"
Port value for UDP receiver, if enabled. The default is 514.
.IP "\fBswift_dir\fR"
Swift configuration directory. The default is /etc/swift.
.IP "\fBuser\fR"
User to run as. The default is swift.
.RE
.PD
.SH CONTAINER RECONCILER SECTION
.PD 1
.RS 0
.IP "\fB[container-reconciler]\fR"
.RE
.RS 3
.IP "\fBinterval\fR"
Minimum time for a pass to take. The default is 30 seconds.
.IP "\fBreclaim_age\fR"
Time elapsed in seconds before an object can be reclaimed. The default is 604800 seconds.
.IP "\fBrequest_tries\fR"
Server errors from requests will be retried by default. The default is 3.
.RE
.PD
.SH PIPELINE SECTION
.PD 1
.RS 0
.IP "\fB[pipeline:main]\fR"
.RE
.RS 3
.IP "\fBpipeline\fR"
Pipeline to use for processing operations. The default is "catch_errors proxy-logging cache proxy-server".
.RE
.PD
.SH APP SECTION
.PD 1
.RS 0
\fBFor details of the available options see proxy-server.conf.5.\fR
.RS 0
.IP "\fB[app:proxy-server]\fR"
.RE
.RS 3
.IP "\fBuse\fR"
Entry point for paste.deploy in the server.
This is normally \fBegg:swift#proxy\fR.
.RE
.PD
.SH FILTER SECTIONS
.PD 1
.RS 0
Any section that has its name prefixed by "filter:" indicates a filter section.
Filters are used to specify configuration parameters for specific swift middlewares.
Below are the filters available and respective acceptable parameters.
\fBFor details of the available options for each filter section see proxy-server.conf.5.\fR
.RS 0
.IP "\fB[filter:cache]\fR"
.RE
Caching middleware that manages caching in swift.
.RS 3
.IP "\fBuse\fR"
Entry point for paste.deploy in the server.
This is normally \fBegg:swift#memcache\fR.
.RE
.PD
.RS 0
.IP "\fB[filter:catch_errors]\fR"
.RE
.RS 3
.IP "\fBuse\fR"
Entry point for paste.deploy in the server.
This is normally \fBegg:swift#catch_errors\fR.
.RE
.PD
.RS 0
.IP "\fB[filter:proxy-logging]\fR"
.RE
.RS 3
.IP "\fBuse\fR"
Entry point for paste.deploy in the server.
This is normally \fBegg:swift#proxy_logging\fR.
.RE
.PD
.SH DOCUMENTATION
.LP
More in depth documentation in regards to
.BI swift-container-reconciler
and also about OpenStack Swift as a whole can be found at
.BI https://docs.openstack.org/swift/latest/overview_policies.html.
.SH "SEE ALSO"
.BR swift-container-reconciler(1)

+ 6
- 5
doc/manpages/object-server.conf.5 View File

@ -225,11 +225,12 @@ should not specify any value for "replication_server".
Set to restrict the number of concurrent incoming SSYNC requests
Set to 0 for unlimited (the default is 4). Note that SSYNC requests are only used
by the object reconstructor or the object replicator when configured to use ssync.
.IP "\fBreplication_one_per_device\fR"
Restricts incoming SSYNC requests to one per device,
replication_currency above allowing. This can help control I/O to each
device, but you may wish to set this to False to allow multiple SSYNC
requests (up to the above replication_concurrency setting) per device. The default is true.
.IP "\fBreplication_concurrency_per_device\fR"
Set to restrict the number of concurrent incoming SSYNC requests per device;
set to 0 for unlimited requests per devices. This can help control I/O to each
device. This does not override replication_concurrency described above, so you
may need to adjust both parameters depending on your hardware or network
capacity. Defaults to 1.
.IP "\fBreplication_lock_timeout\fR"
Number of seconds to wait for an existing replication device lock before
giving up. The default is 15.


+ 21
- 8
doc/source/api/large_objects.rst View File

@ -51,7 +51,7 @@ To create a static large object, divide your content into pieces and
create (upload) a segment object to contain each piece.
Create a manifest object. Include the ``multipart-manifest=put``
query string at the end of the manifest object name to indicate that
query parameter at the end of the manifest object name to indicate that
this is a manifest object.
The body of the **PUT** request on the manifest object comprises a json
@ -102,7 +102,7 @@ contrast to dynamic large objects.
}
]
|
|
The ``Content-Length`` request header must contain the length of the
json content—not the length of the segment objects. However, after the
@ -113,9 +113,22 @@ of the concatenated ``ETag`` values of the object segments. You can also
set the ``Content-Type`` request header and custom object metadata.
When the **PUT** operation sees the ``multipart-manifest=put`` query
string, it reads the request body and verifies that each segment
parameter, it reads the request body and verifies that each segment
object exists and that the sizes and ETags match. If there is a
mismatch, the **PUT**\ operation fails.
mismatch, the **PUT** operation fails.
This verification process can take a long time to complete, particularly
as the number of segments increases. You may include a ``heartbeat=on``
query parameter to have the server:
1. send a ``202 Accepted`` response before it begins validating segments,
2. periodically send whitespace characters to keep the connection alive, and
3. send a final response code in the body.
.. note::
The server may still immediately respond with ``400 Bad Request``
if it can determine that the request is invalid before making
backend requests.
If everything matches, the manifest object is created. The
``X-Static-Large-Object`` metadata is set to ``true`` indicating that
@ -124,18 +137,18 @@ this is a static object manifest.
Normally when you perform a **GET** operation on the manifest object,
the response body contains the concatenated content of the segment
objects. To download the manifest list, use the
``multipart-manifest=get`` query string. The resulting list is not
``multipart-manifest=get`` query parameter. The resulting list is not
formatted the same as the manifest you originally used in the **PUT**
operation.
If you use the **DELETE** operation on a manifest object, the manifest
object is deleted. The segment objects are not affected. However, if you
add the ``multipart-manifest=delete`` query string, the segment
add the ``multipart-manifest=delete`` query parameter, the segment
objects are deleted and if all are successfully deleted, the manifest
object is also deleted.
To change the manifest, use a **PUT** operation with the
``multipart-manifest=put`` query string. This request creates a
``multipart-manifest=put`` query parameter. This request creates a
manifest object. You can also update the object metadata in the usual
way.
@ -326,7 +339,7 @@ a manifest object but a normal object with content same as what you would
get on a **GET** request to the original manifest object.
To copy the manifest object, you include the ``multipart-manifest=get``
query string in the **COPY** request. The new object contains the same
query parameter in the **COPY** request. The new object contains the same
manifest as the original. The segment objects are not copied. Instead,
both the original and new manifest objects share the same set of segment
objects.


+ 113
- 113
doc/source/deployment_guide.rst View File

@ -563,119 +563,119 @@ ionice_priority None I/O scheduling priority of server
[object-server]
***************
============================= ====================== ===============================================
Option Default Description
----------------------------- ---------------------- -----------------------------------------------
use paste.deploy entry point for the
object server. For most cases,
this should be
`egg:swift#object`.
set log_name object-server Label used when logging
set log_facility LOG_LOCAL0 Syslog log facility
set log_level INFO Logging level
set log_requests True Whether or not to log each
request
set log_address /dev/log Logging directory
user swift User to run as
max_upload_time 86400 Maximum time allowed to upload an
object
slow 0 If > 0, Minimum time in seconds for a PUT or
DELETE request to complete. This is only
useful to simulate slow devices during testing
and development.
mb_per_sync 512 On PUT requests, sync file every
n MB
keep_cache_size 5242880 Largest object size to keep in
buffer cache
keep_cache_private false Allow non-public objects to stay
in kernel's buffer cache
allowed_headers Content-Disposition, Comma separated list of headers
Content-Encoding, that can be set in metadata on an object.
X-Delete-At, This list is in addition to
X-Object-Manifest, X-Object-Meta-* headers and cannot include
X-Static-Large-Object Content-Type, etag, Content-Length, or deleted
auto_create_account_prefix . Prefix used when automatically
creating accounts.
replication_server Configure parameter for creating
specific server. To handle all verbs,
including replication verbs, do not
specify "replication_server"
(this is the default). To only
handle replication, set to a True
value (e.g. "True" or "1").
To handle only non-replication
verbs, set to "False". Unless you
have a separate replication network, you
should not specify any value for
"replication_server".
replication_concurrency 4 Set to restrict the number of
concurrent incoming SSYNC
requests; set to 0 for unlimited
replication_one_per_device True Restricts incoming SSYNC
requests to one per device,
replication_currency above
allowing. This can help control
I/O to each device, but you may
wish to set this to False to
allow multiple SSYNC
requests (up to the above
replication_concurrency setting)
per device.
replication_lock_timeout 15 Number of seconds to wait for an
existing replication device lock
before giving up.
replication_failure_threshold 100 The number of subrequest failures
before the
replication_failure_ratio is
checked
replication_failure_ratio 1.0 If the value of failures /
successes of SSYNC
subrequests exceeds this ratio,
the overall SSYNC request
will be aborted
splice no Use splice() for zero-copy object
GETs. This requires Linux kernel
version 3.0 or greater. If you set
"splice = yes" but the kernel
does not support it, error messages
will appear in the object server
logs at startup, but your object
servers should continue to function.
nice_priority None Scheduling priority of server processes.
Niceness values range from -20 (most
favorable to the process) to 19 (least
favorable to the process). The default
does not modify priority.
ionice_class None I/O scheduling class of server processes.
I/O niceness class values are IOPRIO_CLASS_RT
(realtime), IOPRIO_CLASS_BE (best-effort),
and IOPRIO_CLASS_IDLE (idle).
The default does not modify class and
priority. Linux supports io scheduling
priorities and classes since 2.6.13 with
the CFQ io scheduler.
Work only with ionice_priority.
ionice_priority None I/O scheduling priority of server
processes. I/O niceness priority is
a number which goes from 0 to 7.
The higher the value, the lower the I/O
priority of the process. Work only with
ionice_class.
Ignored if IOPRIO_CLASS_IDLE is set.
eventlet_tpool_num_threads auto The number of threads in eventlet's thread pool.
Most IO will occur in the object server's main
thread, but certain "heavy" IO operations will
occur in separate IO threads, managed by
eventlet.
The default value is auto, whose actual value
is dependent on the servers_per_port value.
If servers_per_port is zero then it uses
eventlet's default (currently 20 threads).
If the servers_per_port is nonzero then it'll
only use 1 thread per process.
This value can be overridden with an integer
value.
============================= ====================== ===============================================
================================== ====================== ===============================================
Option Default Description
---------------------------------- ---------------------- -----------------------------------------------
use paste.deploy entry point for the
object server. For most cases,
this should be
`egg:swift#object`.
set log_name object-server Label used when logging
set log_facility LOG_LOCAL0 Syslog log facility
set log_level INFO Logging level
set log_requests True Whether or not to log each
request
set log_address /dev/log Logging directory
user swift User to run as
max_upload_time 86400 Maximum time allowed to upload an
object
slow 0 If > 0, Minimum time in seconds for a PUT or
DELETE request to complete. This is only
useful to simulate slow devices during testing
and development.
mb_per_sync 512 On PUT requests, sync file every
n MB
keep_cache_size 5242880 Largest object size to keep in
buffer cache
keep_cache_private false Allow non-public objects to stay
in kernel's buffer cache
allowed_headers Content-Disposition, Comma separated list of headers
Content-Encoding, that can be set in metadata on an object.
X-Delete-At, This list is in addition to
X-Object-Manifest, X-Object-Meta-* headers and cannot include
X-Static-Large-Object Content-Type, etag, Content-Length, or deleted
auto_create_account_prefix . Prefix used when automatically
creating accounts.
replication_server Configure parameter for creating
specific server. To handle all verbs,
including replication verbs, do not
specify "replication_server"
(this is the default). To only
handle replication, set to a True
value (e.g. "True" or "1").
To handle only non-replication
verbs, set to "False". Unless you
have a separate replication network, you
should not specify any value for
"replication_server".
replication_concurrency 4 Set to restrict the number of
concurrent incoming SSYNC
requests; set to 0 for unlimited
replication_concurrency_per_device 1 Set to restrict the number of
concurrent incoming SSYNC
requests per device; set to 0 for
unlimited requests per devices.
This can help control I/O to each
device. This does not override
replication_concurrency described
above, so you may need to adjust
both parameters depending on your
hardware or network capacity.
replication_lock_timeout 15 Number of seconds to wait for an
existing replication device lock
before giving up.
replication_failure_threshold 100 The number of subrequest failures
before the
replication_failure_ratio is
checked
replication_failure_ratio 1.0 If the value of failures /
successes of SSYNC
subrequests exceeds this ratio,
the overall SSYNC request
will be aborted
splice no Use splice() for zero-copy object
GETs. This requires Linux kernel
version 3.0 or greater. If you set
"splice = yes" but the kernel
does not support it, error messages
will appear in the object server
logs at startup, but your object
servers should continue to function.
nice_priority None Scheduling priority of server processes.
Niceness values range from -20 (most
favorable to the process) to 19 (least
favorable to the process). The default
does not modify priority.
ionice_class None I/O scheduling class of server processes.
I/O niceness class values are IOPRIO_CLASS_RT
(realtime), IOPRIO_CLASS_BE (best-effort),
and IOPRIO_CLASS_IDLE (idle).
The default does not modify class and
priority. Linux supports io scheduling
priorities and classes since 2.6.13 with
the CFQ io scheduler.
Work only with ionice_priority.
ionice_priority None I/O scheduling priority of server
processes. I/O niceness priority is
a number which goes from 0 to 7.
The higher the value, the lower the I/O
priority of the process. Work only with
ionice_class.
Ignored if IOPRIO_CLASS_IDLE is set.
eventlet_tpool_num_threads auto The number of threads in eventlet's thread pool.
Most IO will occur in the object server's main
thread, but certain "heavy" IO operations will
occur in separate IO threads, managed by
eventlet.
The default value is auto, whose actual value
is dependent on the servers_per_port value.
If servers_per_port is zero then it uses
eventlet's default (currently 20 threads).
If the servers_per_port is nonzero then it'll
only use 1 thread per process.
This value can be overridden with an integer
value.
================================== ====================== ===============================================
*******************
[object-replicator]


+ 55
- 55
doc/source/overview_policies.rst View File

@ -219,13 +219,13 @@ used. :ref:`configure-policy` describes how to deprecate a policy.
Swift's behavior with deprecated policies is as follows:
* The deprecated policy will not appear in /info
* PUT/GET/DELETE/POST/HEAD are still allowed on the pre-existing containers
created with a deprecated policy
* Clients will get an ''400 Bad Request'' error when trying to create a new
container using the deprecated policy
* Clients still have access to policy statistics via HEAD on pre-existing
containers
* The deprecated policy will not appear in /info
* PUT/GET/DELETE/POST/HEAD are still allowed on the pre-existing containers
created with a deprecated policy
* Clients will get an ''400 Bad Request'' error when trying to create a new
container using the deprecated policy
* Clients still have access to policy statistics via HEAD on pre-existing
containers
.. note::
@ -272,10 +272,10 @@ section name must be of the form ``[storage-policy:<N>]`` where ``<N>`` is the
policy index. There's no reason other than readability that policy indexes be
sequential but the following rules are enforced:
* If a policy with index ``0`` is not declared and no other policies are
defined, Swift will create a default policy with index ``0``.
* The policy index must be a non-negative integer.
* Policy indexes must be unique.
* If a policy with index ``0`` is not declared and no other policies are
defined, Swift will create a default policy with index ``0``.
* The policy index must be a non-negative integer.
* Policy indexes must be unique.
.. warning::
@ -284,46 +284,46 @@ sequential but the following rules are enforced:
Each policy section contains the following options:
* ``name = <policy_name>`` (required)
- The primary name of the policy.
- Policy names are case insensitive.
- Policy names must contain only letters, digits or a dash.
- Policy names must be unique.
- Policy names can be changed.
- The name ``Policy-0`` can only be used for the policy with
index ``0``.
* ``alias = <policy_name>[, <policy_name>, ...]`` (optional)
- A comma-separated list of alternative names for the policy.
- The default value is an empty list (i.e. no aliases).
- All alias names must follow the rules for the ``name`` option.
- Aliases can be added to and removed from the list.
- Aliases can be useful to retain support for old primary names if the
primary name is changed.
* ``default = [true|false]`` (optional)
- If ``true`` then this policy will be used when the client does not
specify a policy.
- The default value is ``false``.
- The default policy can be changed at any time, by setting
``default = true`` in the desired policy section.
- If no policy is declared as the default and no other policies are
defined, the policy with index ``0`` is set as the default;
- Otherwise, exactly one policy must be declared default.
- Deprecated policies cannot be declared the default.
- See :ref:`default-policy` for more information.
* ``deprecated = [true|false]`` (optional)
- If ``true`` then new containers cannot be created using this policy.
- The default value is ``false``.
- Any policy may be deprecated by adding the ``deprecated`` option to
the desired policy section. However, a deprecated policy may not also
be declared the default. Therefore, since there must always be a
default policy, there must also always be at least one policy which
is not deprecated.
- See :ref:`deprecate-policy` for more information.
* ``policy_type = [replication|erasure_coding]`` (optional)
- The option ``policy_type`` is used to distinguish between different
policy types.
- The default value is ``replication``.
- When defining an EC policy use the value ``erasure_coding``.
* ``name = <policy_name>`` (required)
- The primary name of the policy.
- Policy names are case insensitive.
- Policy names must contain only letters, digits or a dash.
- Policy names must be unique.
- Policy names can be changed.
- The name ``Policy-0`` can only be used for the policy with
index ``0``.
* ``aliases = <policy_name>[, <policy_name>, ...]`` (optional)
- A comma-separated list of alternative names for the policy.
- The default value is an empty list (i.e. no aliases).
- All alias names must follow the rules for the ``name`` option.
- Aliases can be added to and removed from the list.
- Aliases can be useful to retain support for old primary names if the
primary name is changed.
* ``default = [true|false]`` (optional)
- If ``true`` then this policy will be used when the client does not
specify a policy.
- The default value is ``false``.
- The default policy can be changed at any time, by setting
``default = true`` in the desired policy section.
- If no policy is declared as the default and no other policies are
defined, the policy with index ``0`` is set as the default;
- Otherwise, exactly one policy must be declared default.
- Deprecated policies cannot be declared the default.
- See :ref:`default-policy` for more information.
* ``deprecated = [true|false]`` (optional)
- If ``true`` then new containers cannot be created using this policy.
- The default value is ``false``.
- Any policy may be deprecated by adding the ``deprecated`` option to
the desired policy section. However, a deprecated policy may not also
be declared the default. Therefore, since there must always be a
default policy, there must also always be at least one policy which
is not deprecated.
- See :ref:`deprecate-policy` for more information.
* ``policy_type = [replication|erasure_coding]`` (optional)
- The option ``policy_type`` is used to distinguish between different
policy types.
- The default value is ``replication``.
- When defining an EC policy use the value ``erasure_coding``.
The EC policy type has additional required options. See
:ref:`using_ec_policy` for details.
@ -648,10 +648,10 @@ that you wanted to take an existing cluster that already has lots of data on it
and upgrade to Swift with Storage Policies. From there you want to go ahead and
create a policy and test a few things out. All you need to do is:
#. Upgrade all of your Swift nodes to a policy-aware version of Swift
#. Define your policies in ``/etc/swift/swift.conf``
#. Create the corresponding object rings
#. Create containers and objects and confirm their placement is as expected
#. Upgrade all of your Swift nodes to a policy-aware version of Swift
#. Define your policies in ``/etc/swift/swift.conf``
#. Create the corresponding object rings
#. Create containers and objects and confirm their placement is as expected
For a specific example that takes you through these steps, please see
:doc:`policies_saio`


+ 6
- 5
etc/object-server.conf-sample View File

@ -163,11 +163,12 @@ use = egg:swift#object
# object replicator when configured to use ssync.
# replication_concurrency = 4
#
# Restricts incoming SSYNC requests to one per device,
# replication_currency above allowing. This can help control I/O to each
# device, but you may wish to set this to False to allow multiple SSYNC
# requests (up to the above replication_concurrency setting) per device.
# replication_one_per_device = True
# Set to restrict the number of concurrent incoming SSYNC requests per
# device; set to 0 for unlimited requests per device. This can help control
# I/O to each device. This does not override replication_concurrency described
# above, so you may need to adjust both parameters depending on your hardware
# or network capacity.
# replication_concurrency_per_device = 1
#
# Number of seconds to wait for an existing replication device lock before
# giving up.


+ 5
- 0
etc/proxy-server.conf-sample View File

@ -900,6 +900,11 @@ use = egg:swift#slo
# Default is to use the concurrency value from above; all of the same caveats
# apply regarding recommended ranges.
# delete_concurrency = 2
#
# In order to keep a connection active during a potentially long PUT request,
# clients may request that Swift send whitespace ahead of the final response
# body. This whitespace will be yielded at most every yield_frequency seconds.
# yield_frequency = 10
# Note: Put after auth and staticweb in the pipeline.
# If you don't put it in the pipeline, it will be inserted for you.


+ 4
- 4
swift/account/reaper.py View File

@ -28,13 +28,14 @@ import six
import swift.common.db
from swift.account.backend import AccountBroker, DATADIR
from swift.common.constraints import check_drive
from swift.common.direct_client import direct_delete_container, \
direct_delete_object, direct_get_container
from swift.common.exceptions import ClientException
from swift.common.ring import Ring
from swift.common.ring.utils import is_local_device
from swift.common.utils import get_logger, whataremyips, ismount, \
config_true_value, Timestamp
from swift.common.utils import get_logger, whataremyips, config_true_value, \
Timestamp
from swift.common.daemon import Daemon
from swift.common.storage_policy import POLICIES, PolicyError
@ -133,8 +134,7 @@ class AccountReaper(Daemon):
begin = time()
try:
for device in os.listdir(self.devices):
if self.mount_check and not ismount(
os.path.join(self.devices, device)):
if not check_drive(self.devices, device, self.mount_check):
self.logger.increment('errors')
self.logger.debug(
_('Skipping %s as it is not mounted'), device)


+ 5
- 4
swift/common/db_replicator.py View File

@ -28,10 +28,11 @@ from eventlet import GreenPool, sleep, Timeout
from eventlet.green import subprocess
import swift.common.db
from swift.common.constraints import check_drive
from swift.common.direct_client import quote
from swift.common.utils import get_logger, whataremyips, storage_directory, \
renamer, mkdirs, lock_parent_directory, config_true_value, \
unlink_older_than, dump_recon_cache, rsync_module_interpolation, ismount, \
unlink_older_than, dump_recon_cache, rsync_module_interpolation, \
json, Timestamp
from swift.common import ring
from swift.common.ring.utils import is_local_device
@ -636,8 +637,8 @@ class Replicator(Daemon):
node['replication_ip'],
node['replication_port']):
found_local = True
if self.mount_check and not ismount(
os.path.join(self.root, node['device'])):
if not check_drive(self.root, node['device'],
self.mount_check):
self._add_failure_stats(
[(failure_dev['replication_ip'],
failure_dev['device'])
@ -696,7 +697,7 @@ class ReplicatorRpc(object):
return HTTPBadRequest(body='Invalid object type')
op = args.pop(0)
drive, partition, hsh = replicate_args
if self.mount_check and not ismount(os.path.join(self.root, drive)):
if not check_drive(self.root, drive, self.mount_check):
return Response(status='507 %s is not mounted' % drive)
db_file = os.path.join(self.root, drive,
storage_directory(self.datadir, partition, hsh),


+ 32
- 24
swift/common/middleware/bulk.py View File

@ -218,40 +218,48 @@ ACCEPTABLE_FORMATS = ['text/plain', 'application/json', 'application/xml',
'text/xml']
def get_response_body(data_format, data_dict, error_list):
def get_response_body(data_format, data_dict, error_list, root_tag):
"""
Returns a properly formatted response body according to format. Handles
json and xml, otherwise will return text/plain. Note: xml response does not
include xml declaration.
Returns a properly formatted response body according to format.
Handles json and xml, otherwise will return text/plain.
Note: xml response does not include xml declaration.
:params data_format: resulting format
:params data_dict: generated data about results.
:params error_list: list of quoted filenames that failed
:params root_tag: the tag name to use for root elements when returning XML;
e.g. 'extract' or 'delete'
"""
if data_format == 'application/json':
data_dict['Errors'] = error_list
return json.dumps(data_dict)
if data_format and data_format.endswith('/xml'):
output = '<delete>\n'
output = ['<', root_tag, '>\n']
for key in sorted(data_dict):
xml_key = key.replace(' ', '_').lower()
output += '<%s>%s</%s>\n' % (xml_key, data_dict[key], xml_key)
output += '<errors>\n'
output += '\n'.join(
['<object>'
'<name>%s</name><status>%s</status>'
'</object>' % (saxutils.escape(name), status) for
name, status in error_list])
output += '</errors>\n</delete>\n'
return output
output = ''
output.extend([
'<', xml_key, '>',
saxutils.escape(str(data_dict[key])),
'</', xml_key, '>\n',
])
output.append('<errors>\n')
for name, status in error_list:
output.extend([
'<object><name>', saxutils.escape(name), '</name><status>',
saxutils.escape(status), '</status></object>\n',
])
output.extend(['</errors>\n</', root_tag, '>\n'])
return ''.join(output)
output = []
for key in sorted(data_dict):
output += '%s: %s\n' % (key, data_dict[key])
output += 'Errors:\n'
output += '\n'.join(
['%s, %s' % (name, status)
for name, status in error_list])
return output
output.append('%s: %s\n' % (key, data_dict[key]))
output.append('Errors:\n')
output.extend(
'%s, %s\n' % (name, status)
for name, status in error_list)
return ''.join(output)
def pax_key_to_swift_header(pax_key):
@ -485,7 +493,7 @@ class Bulk(object):
resp_dict['Response Status'] = HTTPServerError().status
yield separator + get_response_body(out_content_type,
resp_dict, failed_files)
resp_dict, failed_files, 'delete')
def handle_extract_iter(self, req, compress_type,
out_content_type='text/plain'):
@ -639,7 +647,7 @@ class Bulk(object):
resp_dict['Response Status'] = HTTPServerError().status
yield separator + get_response_body(
out_content_type, resp_dict, failed_files)
out_content_type, resp_dict, failed_files, 'extract')
def _process_delete(self, resp, pile, obj_name, resp_dict,
failed_files, failed_file_response, retry=0):


+ 154
- 56
swift/common/middleware/slo.py View File

@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
r"""
Middleware that will provide Static Large Object (SLO) support.
This feature is very similar to Dynamic Large Object (DLO) support in that
@ -72,6 +72,33 @@ found, size/etag mismatch, below minimum size, invalid range) then the user
will receive a 4xx error response. If everything does match, the user will
receive a 2xx response and the SLO object is ready for downloading.
Note that large manifests may take a long time to verify; historically,
clients would need to use a long read timeout for the connection to give
Swift enough time to send a final ``201 Created`` or ``400 Bad Request``
response. Now, clients should use the query parameters::
?multipart-manifest=put&heartbeat=on
to request that Swift send an immediate ``202 Accepted`` response and periodic
whitespace to keep the connection alive. A final response code will appear in
the body. The format of the response body defaults to text/plain but can be
either json or xml depending on the ``Accept`` header. An example body is as
follows::
Response Status: 201 Created
Response Body:
Etag: "8f481cede6d2ddc07cb36aa084d9a64d"
Last Modified: Wed, 25 Oct 2017 17:08:55 GMT
Errors:
Or, as a json response::
{"Response Status": "201 Created",
"Response Body": "",
"Etag": "\"8f481cede6d2ddc07cb36aa084d9a64d\"",
"Last Modified": "Wed, 25 Oct 2017 17:08:55 GMT",
"Errors": []}
Behind the scenes, on success, a JSON manifest generated from the user input is
sent to object servers with an extra ``X-Static-Large-Object: True`` header
and a modified ``Content-Type``. The items in this manifest will include the
@ -251,12 +278,14 @@ import json
import mimetypes
import re
import six
import time
from hashlib import md5
from swift.common.exceptions import ListingIterError, SegmentError
from swift.common.swob import Request, HTTPBadRequest, HTTPServerError, \
HTTPMethodNotAllowed, HTTPRequestEntityTooLarge, HTTPLengthRequired, \
HTTPOk, HTTPPreconditionFailed, HTTPException, HTTPNotFound, \
HTTPUnauthorized, HTTPConflict, HTTPUnprocessableEntity, Response, Range
HTTPUnauthorized, HTTPConflict, HTTPUnprocessableEntity, Response, Range, \
RESPONSE_REASONS
from swift.common.utils import get_logger, config_true_value, \
get_valid_utf8_str, override_bytes_from_content_type, split_path, \
register_swift_info, RateLimitedIterator, quote, close_if_possible, \
@ -273,6 +302,7 @@ from swift.common.middleware.bulk import get_response_body, \
DEFAULT_RATE_LIMIT_UNDER_SIZE = 1024 * 1024 # 1 MiB
DEFAULT_MAX_MANIFEST_SEGMENTS = 1000
DEFAULT_MAX_MANIFEST_SIZE = 1024 * 1024 * 2 # 2 MiB
DEFAULT_YIELD_FREQUENCY = 10
REQUIRED_SLO_KEYS = set(['path'])
@ -862,16 +892,26 @@ class StaticLargeObject(object):
:param app: The next WSGI filter or app in the paste.deploy chain.
:param conf: The configuration dict for the middleware.
:param max_manifest_segments: The maximum number of segments allowed in
newly-created static large objects.
:param max_manifest_size: The maximum size (in bytes) of newly-created
static-large-object manifests.
:param yield_frequency: If the client included ``heartbeat=on`` in the
query parameters when creating a new static large
object, the period of time to wait between sending
whitespace to keep the connection alive.
"""
def __init__(self, app, conf,
max_manifest_segments=DEFAULT_MAX_MANIFEST_SEGMENTS,
max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE):
max_manifest_size=DEFAULT_MAX_MANIFEST_SIZE,
yield_frequency=DEFAULT_YIELD_FREQUENCY):
self.conf = conf
self.app = app
self.logger = get_logger(conf, log_route='slo')
self.max_manifest_segments = max_manifest_segments
self.max_manifest_size = max_manifest_size
self.yield_frequency = yield_frequency
self.max_get_time = int(self.conf.get('max_get_time', 86400))
self.rate_limit_under_size = int(self.conf.get(
'rate_limit_under_size', DEFAULT_RATE_LIMIT_UNDER_SIZE))
@ -930,7 +970,6 @@ class StaticLargeObject(object):
raise HTTPRequestEntityTooLarge(
'Number of segments must be <= %d' %
self.max_manifest_segments)
total_size = 0
try:
out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
except ValueError:
@ -954,6 +993,7 @@ class StaticLargeObject(object):
return obj_name, sub_req.get_response(self)
def validate_seg_dict(seg_dict, head_seg_resp, allow_empty_segment):
obj_name = seg_dict['path']
if not head_seg_resp.is_success:
problem_segments.append([quote(obj_name),
head_seg_resp.status])
@ -1011,61 +1051,115 @@ class StaticLargeObject(object):
seg_data['sub_slo'] = True
return segment_length, seg_data
heartbeat = config_true_value(req.params.get('heartbeat'))
separator = ''
if heartbeat:
# Apparently some ways of deploying require that this to happens
# *before* the return? Not sure why.
req.environ['eventlet.minimum_write_chunk_size'] = 0
start_response('202 Accepted', [ # NB: not 201 !
('Content-Type', out_content_type),
])
separator = '\r\n\r\n'
data_for_storage = [None] * len(parsed_data)
with StreamingPile(self.concurrency) as pile:
for obj_name, resp in pile.asyncstarmap(do_head, (
(path, ) for path in path2indices)):
for i in path2indices[obj_name]:
segment_length, seg_data = validate_seg_dict(
parsed_data[i], resp,
allow_empty_segment=(i == len(parsed_data) - 1))
data_for_storage[i] = seg_data
total_size += segment_length
if problem_segments:
resp_body = get_response_body(
out_content_type, {}, problem_segments)
raise HTTPBadRequest(resp_body, content_type=out_content_type)
slo_etag = md5()
for seg_data in data_for_storage:
if seg_data.get('range'):
slo_etag.update('%s:%s;' % (seg_data['hash'],
seg_data['range']))
else:
slo_etag.update(seg_data['hash'])
slo_etag = slo_etag.hexdigest()
client_etag = req.headers.get('Etag')
if client_etag and client_etag.strip('"') != slo_etag:
raise HTTPUnprocessableEntity(request=req)
def resp_iter():
total_size = 0
# wsgi won't propagate start_response calls until some data has
# been yielded so make sure first heartbeat is sent immediately
if heartbeat:
yield ' '
last_yield_time = time.time()
with StreamingPile(self.concurrency) as pile:
for obj_name, resp in pile.asyncstarmap(do_head, (
(path, ) for path in path2indices)):
now = time.time()
if heartbeat and (now - last_yield_time >
self.yield_frequency):
# Make sure we've called start_response before
# sending data
yield ' '
last_yield_time = now
for i in path2indices[obj_name]:
segment_length, seg_data = validate_seg_dict(
parsed_data[i], resp,
allow_empty_segment=(i == len(parsed_data) - 1))
data_for_storage[i] = seg_data
total_size += segment_length
if problem_segments:
err = HTTPBadRequest(content_type=out_content_type)
resp_dict = {}
if heartbeat:
resp_dict['Response Status'] = err.status
resp_dict['Response Body'] = err.body or '\n'.join(
RESPONSE_REASONS.get(err.status_int, ['']))
else:
start_response(err.status,
[(h, v) for h, v in err.headers.items()
if h.lower() != 'content-length'])
yield separator + get_response_body(
out_content_type, resp_dict, problem_segments, 'upload')
return
slo_etag = md5()
for seg_data in data_for_storage:
if seg_data.get('range'):
slo_etag.update('%s:%s;' % (seg_data['hash'],
seg_data['range']))
else:
slo_etag.update(seg_data['hash'])
slo_etag = slo_etag.hexdigest()
client_etag = req.headers.get('Etag')
if client_etag and client_etag.strip('"') != slo_etag:
err = HTTPUnprocessableEntity(request=req)
if heartbeat:
yield separator + get_response_body(out_content_type, {
'Response Status': err.status,
'Response Body': err.body or '\n'.join(
RESPONSE_REASONS.get(err.status_int, [''])),
}, problem_segments, 'upload')
else:
for chunk in err(req.environ, start_response):
yield chunk
return
json_data = json.dumps(data_for_storage)
if six.PY3:
json_data = json_data.encode('utf-8')
req.body = json_data
req.headers.update({
SYSMETA_SLO_ETAG: slo_etag,
SYSMETA_SLO_SIZE: total_size,
'X-Static-Large-Object': 'True',
'Etag': md5(json_data).hexdigest(),
})
env = req.environ
if not env.get('CONTENT_TYPE'):
guessed_type, _junk = mimetypes.guess_type(req.path_info)
env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
env['swift.content_type_overridden'] = True
env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
def start_response_wrapper(status, headers, exc_info=None):
for i, (header, _value) in enumerate(headers):
if header.lower() == 'etag':
headers[i] = ('Etag', '"%s"' % slo_etag)
break
return start_response(status, headers, exc_info)
json_data = json.dumps(data_for_storage)
if six.PY3:
json_data = json_data.encode('utf-8')
req.body = json_data
req.headers.update({
SYSMETA_SLO_ETAG: slo_etag,
SYSMETA_SLO_SIZE: total_size,
'X-Static-Large-Object': 'True',
'Etag': md5(json_data).hexdigest(),
})
env = req.environ
if not env.get('CONTENT_TYPE'):
guessed_type, _junk = mimetypes.guess_type(req.path_info)
env['CONTENT_TYPE'] = (guessed_type or
'application/octet-stream')
env['swift.content_type_overridden'] = True
env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
resp = req.get_response(self.app)
resp_dict = {'Response Status': resp.status}
if resp.is_success:
resp.etag = slo_etag
resp_dict['Etag'] = resp.headers['Etag']
resp_dict['Last Modified'] = resp.headers['Last-Modified']
if heartbeat:
resp_dict['Response Body'] = resp.body
yield separator + get_response_body(
out_content_type, resp_dict, [], 'upload')
else:
for chunk in resp(req.environ, start_response):
yield chunk
return self.app(env, start_response_wrapper)
return resp_iter()
def get_segments_to_delete_iter(self, req):
"""
@ -1214,10 +1308,13 @@ def filter_factory(global_conf, **local_conf):
DEFAULT_MAX_MANIFEST_SEGMENTS))
max_manifest_size = int(conf.get('max_manifest_size',
DEFAULT_MAX_MANIFEST_SIZE))
yield_frequency = int(conf.get('yield_frequency',
DEFAULT_YIELD_FREQUENCY))
register_swift_info('slo',
max_manifest_segments=max_manifest_segments,
max_manifest_size=max_manifest_size,
yield_frequency=yield_frequency,
# this used to be configurable; report it as 1 for
# clients that might still care
min_segment_size=1)
@ -1226,5 +1323,6 @@ def filter_factory(global_conf, **local_conf):
return StaticLargeObject(
app, conf,
max_manifest_segments=max_manifest_segments,
max_manifest_size=max_manifest_size)
max_manifest_size=max_manifest_size,
yield_frequency=yield_frequency)
return slo_filter

+ 4
- 4
swift/common/ring/builder.py View File

@ -130,7 +130,7 @@ class RingBuilder(object):
# within a given number of hours (24 is my usual test). Removing
# a device overrides this behavior as it's assumed that's only
# done because of device failure.
self._last_part_moves = None
self._last_part_moves = array('B', itertools.repeat(0, self.parts))
# _part_moved_bitmap record parts have been moved
self._part_moved_bitmap = None
# _last_part_moves_epoch indicates the time the offsets in
@ -167,7 +167,7 @@ class RingBuilder(object):
@property
def ever_rebalanced(self):
return self._last_part_moves is not None
return self._replica2part2dev is not None
def _set_part_moved(self, part):
self._last_part_moves[part] = 0
@ -507,7 +507,7 @@ class RingBuilder(object):
if not self.ever_rebalanced:
self.logger.debug("New builder; performing initial balance")
self._last_part_moves = array('B', itertools.repeat(0, self.parts))
self._update_last_part_moves()
with _set_random_seed(seed):
@ -925,7 +925,7 @@ class RingBuilder(object):
"""
self._part_moved_bitmap = bytearray(max(2 ** (self.part_power - 3), 1))
elapsed_hours = int(time() - self._last_part_moves_epoch) / 3600
if elapsed_hours <= 0 or not self._last_part_moves:
if elapsed_hours <= 0:
return
for part in range(self.parts):
# The "min(self._last_part_moves[part] + elapsed_hours, 0xff)"


+ 10
- 6
swift/common/ring/composite_builder.py View File

@ -639,6 +639,7 @@ class CompositeRingBuilder(object):
component builder.
"""
self._load_components()
self.update_last_part_moves()
component_builders = zip(self._builder_files, self._builders)
# don't let the same builder go first each time
shuffle(component_builders)
@ -678,10 +679,10 @@ class CompositeRingBuilder(object):
Updates the record of how many hours ago each partition was moved in
all component builders.
"""
# Called by component builders. We need all component builders to be at
# same last_part_moves epoch before any builder starts moving parts;
# this will effectively be a no-op for builders that have already been
# updated in last hour
# Called at start of each composite rebalance. We need all component
# builders to be at same last_part_moves epoch before any builder
# starts moving parts; this will effectively be a no-op for builders
# that have already been updated in last hour
for b in self._builders:
b.update_last_part_moves()
@ -723,8 +724,11 @@ class CooperativeRingBuilder(RingBuilder):
super(CooperativeRingBuilder, self)._can_part_move(part))
def _update_last_part_moves(self):
# overrides superclass method to delegate to parent builder
return self.parent_builder.update_last_part_moves()
# overrides superclass method - parent builder should have called
# update_last_part_moves() before rebalance; calling the superclass
# method here would reset _part_moved_bitmap which is state we rely on
# when min_part_hours is zero
pass
def update_last_part_moves(self):
"""


+ 2
- 6
swift/common/storage_policy.py View File

@ -21,7 +21,7 @@ import six
from six.moves.configparser import ConfigParser
from swift.common.utils import (
config_true_value, quorum_size, whataremyips, list_from_csv,
config_positive_int_value)
config_positive_int_value, get_zero_indexed_base_string)
from swift.common.ring import Ring, RingData
from swift.common import utils
from swift.common.exceptions import RingLoadError
@ -92,11 +92,7 @@ class PolicyError(ValueError):
def _get_policy_string(base, policy_index):
if policy_index == 0 or policy_index is None:
return_string = base
else:
return_string = base + "-%d" % int(policy_index)
return return_string
return get_zero_indexed_base_string(base, policy_index)
def get_policy_string(base, policy_or_index):


+ 46
- 8
swift/common/utils.py View File

@ -2280,8 +2280,45 @@ def hash_path(account, container=None, object=None, raw_digest=False):
+ HASH_PATH_SUFFIX).hexdigest()
def get_zero_indexed_base_string(base, index):
"""
This allows the caller to make a list of things with indexes, where the
first item (zero indexed) is just the bare base string, and subsequent
indexes are appended '-1', '-2', etc.
e.g.::
'lock', None => 'lock'
'lock', 0 => 'lock'
'lock', 1 => 'lock-1'
'object', 2 => 'object-2'
:param base: a string, the base string; when ``index`` is 0 (or None) this
is the identity function.
:param index: a digit, typically an integer (or None); for values other
than 0 or None this digit is appended to the base string
separated by a hyphen.
"""
if index == 0 or index is None:
return_string = base
else:
return_string = base + "-%d" % int(index)
return return_string
def _get_any_lock(fds):
for fd in fds:
try:
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
return True
except IOError as err:
if err.errno != errno.EAGAIN:
raise
return False
@contextmanager
def lock_path(directory, timeout=10, timeout_class=None):
def lock_path(directory, timeout=10, timeout_class=None, limit=1):
"""
Context manager that acquires a lock on a directory. This will block until
the lock can be acquired, or the timeout time has expired (whichever occurs
@ -2297,12 +2334,16 @@ def lock_path(directory, timeout=10, timeout_class=None):
lock cannot be granted within the timeout. Will be
constructed as timeout_class(timeout, lockpath). Default:
LockTimeout
:param limit: the maximum number of locks that may be held concurrently on
the same directory; defaults to 1
"""
if timeout_class is None:
timeout_class = swift.common.exceptions.LockTimeout
mkdirs(directory)
lockpath = '%s/.lock' % directory
fd = os.open(lockpath, os.O_WRONLY | os.O_CREAT)
fds = [os.open(get_zero_indexed_base_string(lockpath, i),
os.O_WRONLY | os.O_CREAT)
for i in range(limit)]
sleep_time = 0.01
slower_sleep_time = max(timeout * 0.01, sleep_time)
slowdown_at = timeout * 0.01
@ -2310,19 +2351,16 @@ def lock_path(directory, timeout=10, timeout_class=None):
try:
with timeout_class(timeout, lockpath):
while True:
try:
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
if _get_any_lock(fds):
break
except IOError as err:
if err.errno != errno.EAGAIN:
raise
if time_slept > slowdown_at:
sleep_time = slower_sleep_time
sleep(sleep_time)
time_slept += sleep_time
yield True
finally:
os.close(fd)
for fd in fds:
os.close(fd)
@contextmanager


+ 6
- 5
swift/container/updater.py View File

@ -26,11 +26,12 @@ from tempfile import mkstemp
from eventlet import spawn, Timeout
import swift.common.db
from swift.common.constraints import check_drive
from swift.container.backend import ContainerBroker, DATADIR
from swift.common.bufferedhttp import http_connect
from swift.common.exceptions import ConnectionTimeout
from swift.common.ring import Ring
from swift.common.utils import get_logger, config_true_value, ismount, \
from swift.common.utils import get_logger, config_true_value, \
dump_recon_cache, majority_size, Timestamp, ratelimit_sleep, \
eventlet_monkey_patch
from swift.common.daemon import Daemon
@ -40,9 +41,9 @@ from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR
class ContainerUpdater(Daemon):
"""Update container information in account listings."""
def __init__(self, conf):
def __init__(self, conf, logger=None):
self.conf = conf
self.logger = get_logger(conf, log_route='container-updater')
self.logger = logger or get_logger(conf, log_route='container-updater')
self.devices = conf.get('devices', '/srv/node')
self.mount_check = config_true_value(conf.get('mount_check', 'true'))
self.swift_dir = conf.get('swift_dir', '/etc/swift')
@ -100,8 +101,8 @@ class ContainerUpdater(Daemon):
"""
paths = []
for device in self._listdir(self.devices):
dev_path = os.path.join(self.devices, device)
if self.mount_check and not ismount(dev_path):
dev_path = check_drive(self.devices, device, self.mount_check)
if not dev_path:
self.logger.warning(_('%s is not mounted'), device)
continue
con_path = os.path.join(dev_path, DATADIR)


+ 2
- 2
swift/obj/auditor.py View File

@ -289,9 +289,9 @@ class AuditorWorker(object):
class ObjectAuditor(Daemon):
"""Audit objects."""
def __init__(self, conf, **options):
def __init__(self, conf, logger=None, **options):
self.conf = conf
self.logger = get_logger(conf, log_route='object-auditor')
self.logger = logger or get_logger(conf, log_route='object-auditor')
self.devices = conf.get('devices', '/srv/node')
self.concurrency = int(conf.get('concurrency', 1))
self.conf_zero_byte_fps = int(


+ 38
- 16
swift/obj/diskfile.py View File

@ -62,7 +62,7 @@ from swift.common.request_helpers import is_sys_meta
from swift.common.utils import mkdirs, Timestamp, \
storage_directory, hash_path, renamer, fallocate, fsync, fdatasync, \
fsync_dir, drop_buffer_cache, lock_path, write_pickle, \
config_true_value, listdir, split_path, ismount, remove_file, \
config_true_value, listdir, split_path, remove_file, \
get_md5_socket, F_SETPIPE_SZ, decode_timestamps, encode_timestamps, \
tpool_reraise, MD5_OF_EMPTY_STRING, link_fd_to_path, o_tmpfile_supported, \
O_TMPFILE, makedirs_count, replace_partition_in_path
@ -429,11 +429,11 @@ def object_audit_location_generator(devices, mount_check=True, logger=None,
shuffle(device_dirs)
for device in device_dirs:
if mount_check and not \
ismount(os.path.join(devices, device)):
if not check_drive(devices, device, mount_check):
if logger:
logger.debug(
_('Skipping %s as it is not mounted'), device)
'Skipping %s as it is not %s', device,
'mounted' if mount_check else 'a dir')
continue
# loop through object dirs for all policies
device_dir = os.path.join(devices, device)
@ -624,8 +624,28 @@ class BaseDiskFileManager(object):
self.bytes_per_sync = int(conf.get('mb_per_sync', 512)) * 1024 * 1024
self.mount_check = config_true_value(conf.get('mount_check', 'true'))
self.reclaim_age = int(conf.get('reclaim_age', DEFAULT_RECLAIM_AGE))
self.replication_one_per_device = config_true_value(
conf.get('replication_one_per_device', 'true'))
replication_concurrency_per_device = conf.get(
'replication_concurrency_per_device')
replication_one_per_device = conf.get('replication_one_per_device')
if replication_concurrency_per_device is None \
and replication_one_per_device is not None:
self.logger.warning('Option replication_one_per_device is '
'deprecated and will be removed in a future '
'version. Update your configuration to use '
'option replication_concurrency_per_device.')
if config_true_value(replication_one_per_device):
replication_concurrency_per_device = 1
else:
replication_concurrency_per_device = 0
elif replication_one_per_device is not None:
self.logger.warning('Option replication_one_per_device ignored as '
'replication_concurrency_per_device is '
'defined.')
if replication_concurrency_per_device is None:
self.replication_concurrency_per_device = 1
else:
self.replication_concurrency_per_device = int(
replication_concurrency_per_device)
self.replication_lock_timeout = int(conf.get(
'replication_lock_timeout', 15))
@ -1189,14 +1209,15 @@ class BaseDiskFileManager(object):
:returns: full path to the device, None if the path to the device is
not a proper mount point or directory.
"""
# we'll do some kind of check unless explicitly forbidden
if mount_check is not False:
if mount_check or self.mount_check:
mount_check = True
else:
mount_check = False
return check_drive(self.devices, device, mount_check)
return join(self.devices, device)
if mount_check is False:
# explicitly forbidden from syscall, just return path
return join(self.devices, device)
# we'll do some kind of check if not explicitly forbidden
if mount_check or self.mount_check:
mount_check = True
else:
mount_check = False
return check_drive(self.devices, device, mount_check)
@contextmanager
def replication_lock(self, device):
@ -1208,12 +1229,13 @@ class BaseDiskFileManager(object):
:raises ReplicationLockTimeout: If the lock on the device
cannot be granted within the configured timeout.
"""
if self.replication_one_per_device:
if self.replication_concurrency_per_device:
dev_path = self.get_dev_path(device)
with lock_path(
dev_path,
timeout=self.replication_lock_timeout,
timeout_class=ReplicationLockTimeout):
timeout_class=ReplicationLockTimeout,
limit=self.replication_concurrency_per_device):
yield True
else:
yield True


+ 10
- 7
swift/obj/replicator.py View File