py3: fix non-ascii metadata handling in account-server

Previously, we were storing the WSGI-style UTF-8-bytes-decoded-as-Latin-1
strings in the JSON field, and sending them back to eventlet directly.
If running in a mixed py2/py3 cluster, replication would eventually get
that back to the py2 server, and worse, the native-string version would
get back to the py3 server! Then on GET or HEAD, eventlet would barf
if any characters were outside the Latin-1 range.

Closes-Bug: #1837805
Change-Id: I31d942e72fd7bfbb1db4dbb1dd522dff69969e5d
This commit is contained in:
Tim Burke 2019-07-24 16:59:08 -07:00
parent d6e790d1b7
commit 71f83963c2
4 changed files with 66 additions and 36 deletions

View File

@ -21,8 +21,6 @@ from swift import gettext_ as _
from eventlet import Timeout
import six
import swift.common.db
from swift.account.backend import AccountBroker, DATADIR
from swift.account.utils import account_listing_response, get_response_headers
@ -110,6 +108,14 @@ class AccountController(BaseStorageServer):
broker.delete_db(req_timestamp.internal)
return self._deleted_response(broker, req, HTTPNoContent)
def _update_metadata(self, req, broker, req_timestamp):
metadata = {
wsgi_to_str(key): (wsgi_to_str(value), req_timestamp.internal)
for key, value in req.headers.items()
if is_sys_or_user_meta('account', key)}
if metadata:
broker.update_metadata(metadata, validate_metadata=True)
@public
@timing_stats()
def PUT(self, req):
@ -169,24 +175,7 @@ class AccountController(BaseStorageServer):
broker.update_put_timestamp(timestamp.internal)
if broker.is_deleted():
return HTTPConflict(request=req)
metadata = {}
if six.PY2:
metadata.update((key, (value, timestamp.internal))
for key, value in req.headers.items()
if is_sys_or_user_meta('account', key))
else:
for key, value in req.headers.items():
if is_sys_or_user_meta('account', key):
# Cast to native strings, so that json inside
# updata_metadata eats the data.
try:
value = value.encode('latin-1').decode('utf-8')
except UnicodeDecodeError:
raise HTTPBadRequest(
'Metadata must be valid UTF-8')
metadata[key] = (value, timestamp.internal)
if metadata:
broker.update_metadata(metadata, validate_metadata=True)
self._update_metadata(req, broker, timestamp)
if created:
return HTTPCreated(request=req)
else:
@ -287,12 +276,7 @@ class AccountController(BaseStorageServer):
broker = self._get_account_broker(drive, part, account)
if broker.is_deleted():
return self._deleted_response(broker, req, HTTPNotFound)
metadata = {}
metadata.update((key, (value, req_timestamp.internal))
for key, value in req.headers.items()
if is_sys_or_user_meta('account', key))
if metadata:
broker.update_metadata(metadata, validate_metadata=True)
self._update_metadata(req, broker, req_timestamp)
return HTTPNoContent(request=req)
def __call__(self, env, start_response):

View File

@ -18,7 +18,7 @@ import json
import six
from swift.common.middleware import listing_formats
from swift.common.swob import HTTPOk, HTTPNoContent
from swift.common.swob import HTTPOk, HTTPNoContent, str_to_wsgi
from swift.common.utils import Timestamp
from swift.common.storage_policy import POLICIES
@ -64,8 +64,8 @@ def get_response_headers(broker):
for key, value in stats.items():
header_name = header_prefix % key.replace('_', '-')
resp_headers[header_name] = value
resp_headers.update((key, value)
for key, (value, timestamp) in
resp_headers.update((str_to_wsgi(key), str_to_wsgi(value))
for key, (value, _timestamp) in
broker.metadata.items() if value != '')
return resp_headers

View File

@ -415,12 +415,11 @@ class ContainerController(BaseStorageServer):
return created
def _update_metadata(self, req, broker, req_timestamp, method):
metadata = {}
metadata.update(
(wsgi_to_str(key), (wsgi_to_str(value), req_timestamp.internal))
metadata = {
wsgi_to_str(key): (wsgi_to_str(value), req_timestamp.internal)
for key, value in req.headers.items()
if key.lower() in self.save_headers or
is_sys_or_user_meta('container', key))
if key.lower() in self.save_headers
or is_sys_or_user_meta('container', key)}
if metadata:
if 'X-Container-Sync-To' in metadata:
if 'X-Container-Sync-To' not in broker.metadata or \
@ -706,7 +705,7 @@ class ContainerController(BaseStorageServer):
def create_listing(self, req, out_content_type, info, resp_headers,
metadata, container_list, container):
for key, (value, timestamp) in metadata.items():
for key, (value, _timestamp) in metadata.items():
if value and (key.lower() in self.save_headers or
is_sys_or_user_meta('container', key)):
resp_headers[str_to_wsgi(key)] = str_to_wsgi(value)

View File

@ -32,6 +32,7 @@ import xml.dom.minidom
from swift import __version__ as swift_version
from swift.common.swob import (Request, WsgiBytesIO, HTTPNoContent)
from swift.common import constraints
from swift.account.backend import AccountBroker
from swift.account.server import AccountController
from swift.common.utils import (normalize_timestamp, replication, public,
mkdirs, storage_directory, Timestamp)
@ -49,7 +50,8 @@ class TestAccountController(unittest.TestCase):
self.testdir = os.path.join(self.testdir_base, 'account_server')
mkdirs(os.path.join(self.testdir, 'sda1'))
self.controller = AccountController(
{'devices': self.testdir, 'mount_check': 'false'})
{'devices': self.testdir, 'mount_check': 'false'},
logger=debug_logger())
def tearDown(self):
"""Tear down for testing swift.account.server.AccountController"""
@ -522,6 +524,51 @@ class TestAccountController(unittest.TestCase):
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 202)
def test_utf8_metadata(self):
ts_str = normalize_timestamp(1)
def get_test_meta(method, headers):
# Set metadata header
headers.setdefault('X-Timestamp', ts_str)
req = Request.blank(
'/sda1/p/a', environ={'REQUEST_METHOD': method},
headers=headers)
resp = req.get_response(self.controller)
self.assertIn(resp.status_int, (201, 202, 204))
db_path = os.path.join(*next(
(dir_name, file_name)
for dir_name, _, files in os.walk(self.testdir)
for file_name in files if file_name.endswith('.db')
))
broker = AccountBroker(db_path)
# Why not use broker.metadata, you ask? Because we want to get
# as close to the on-disk format as is reasonable.
result = json.loads(broker.get_raw_metadata())
# Clear it out for the next run
with broker.get() as conn:
conn.execute("UPDATE account_stat SET metadata=''")
conn.commit()
return result
wsgi_str = '\xf0\x9f\x91\x8d'
uni_str = u'\U0001f44d'
self.assertEqual(
get_test_meta('PUT', {'x-account-sysmeta-' + wsgi_str: wsgi_str}),
{u'X-Account-Sysmeta-' + uni_str: [uni_str, ts_str]})
self.assertEqual(
get_test_meta('PUT', {'x-account-meta-' + wsgi_str: wsgi_str}),
{u'X-Account-Meta-' + uni_str: [uni_str, ts_str]})
self.assertEqual(
get_test_meta('POST', {'x-account-sysmeta-' + wsgi_str: wsgi_str}),
{u'X-Account-Sysmeta-' + uni_str: [uni_str, ts_str]})
self.assertEqual(
get_test_meta('POST', {'x-account-meta-' + wsgi_str: wsgi_str}),
{u'X-Account-Meta-' + uni_str: [uni_str, ts_str]})
def test_PUT_GET_metadata(self):
# Set metadata header
req = Request.blank(