[MongoDB] Fix bug with 'bad' chars in metadatas keys

MongoDB has two restrictions on fields names, they cannot contain '.' and
start with '$'. In resource_metadata field we can get any data, so it could
be dict with any keys, and if some key contain i.e. dots sample with this
metadata wouldn't be stored in MongoDB.

To protect samples from missing this patch propose to check metadata
for occurrence of dots and '$' in keys if metadata is dict, and to transform
dict with dots before storing it in MongoDB. Transformation means that
we make dict more nested and new node appears where there is dot in dict
key.

It means that if we get metadata as a dict {'a.b.c': v} before storing in
MongoDB it would be transform in {'a': {'b': {'c': v}}}.

Closes-Bug: 1246264
Change-Id: I279ae67cd24d4dadd7d7e58007e7c5c297d91772
This commit is contained in:
Igor Degtiarov 2014-12-01 15:51:25 +02:00
parent 85acec02b2
commit f379b96dd2
7 changed files with 170 additions and 23 deletions

View File

@ -208,6 +208,9 @@ class Connection(pymongo_base.Connection):
ceilometer.meter.meter_message_from_counter
"""
# Record the updated resource metadata
data = copy.deepcopy(data)
data['resource_metadata'] = pymongo_utils.improve_keys(
data.pop('resource_metadata'))
self.db.resource.update(
{'_id': data['resource_id']},
{'$set': {'project_id': data['project_id'],
@ -255,7 +258,7 @@ class Connection(pymongo_base.Connection):
if pagination:
raise ceilometer.NotImplementedError('Pagination not implemented')
metaquery = metaquery or {}
metaquery = pymongo_utils.improve_keys(metaquery, metaquery=True) or {}
q = {}
if user is not None:
@ -302,7 +305,8 @@ class Connection(pymongo_base.Connection):
last_sample_timestamp=last_ts,
source=latest_meter['source'],
user_id=latest_meter['user_id'],
metadata=latest_meter['resource_metadata'])
metadata=pymongo_utils.unquote_keys(
latest_meter['resource_metadata']))
def get_meter_statistics(self, sample_filter, period=None, groupby=None,
aggregate=None):

View File

@ -483,6 +483,9 @@ class Connection(pymongo_base.Connection):
# unconditionally insert sample timestamps and resource metadata
# (in the update case, this must be conditional on the sample not
# being out-of-order)
data = copy.deepcopy(data)
data['resource_metadata'] = pymongo_utils.improve_keys(
data.pop('resource_metadata'))
resource = self.db.resource.find_and_modify(
{'_id': data['resource_id']},
{'$set': {'project_id': data['project_id'],
@ -729,7 +732,7 @@ class Connection(pymongo_base.Connection):
first_sample_timestamp=resource['first_timestamp'],
last_sample_timestamp=resource['last_timestamp'],
source=resource['source'],
metadata=resource['metadata'])
metadata=pymongo_utils.unquote_keys(resource['metadata']))
finally:
self.db[out].drop()
@ -762,7 +765,7 @@ class Connection(pymongo_base.Connection):
last_sample_timestamp=r.get('last_sample_timestamp',
self._APOCALYPSE),
source=r['source'],
metadata=r['metadata'])
metadata=pymongo_utils.unquote_keys(r['metadata']))
def get_resources(self, user=None, project=None, source=None,
start_timestamp=None, start_timestamp_op=None,
@ -784,7 +787,7 @@ class Connection(pymongo_base.Connection):
if pagination:
raise ceilometer.NotImplementedError('Pagination not implemented')
metaquery = metaquery or {}
metaquery = pymongo_utils.improve_keys(metaquery, metaquery=True) or {}
query = {}
if user is not None:

View File

@ -25,6 +25,7 @@ from oslo.config import cfg
from oslo.utils import netutils
import pymongo
import six
from six.moves.urllib import parse
from ceilometer.openstack.common.gettextutils import _
from ceilometer.openstack.common import log
@ -146,11 +147,82 @@ def make_query_from_filter(sample_filter, require_meter=True):
# so the samples call metadata resource_metadata, so we convert
# to that.
q.update(dict(('resource_%s' % k, v)
for (k, v) in six.iteritems(sample_filter.metaquery)))
q.update(dict(
('resource_%s' % k, v) for (k, v) in six.iteritems(
improve_keys(sample_filter.metaquery, metaquery=True))))
return q
def quote_key(key, reverse=False):
"""Prepare key for storage data in MongoDB.
:param key: key that should be quoted
:param reverse: boolean, True --- if we need a reverse order of the keys
parts
:return: iter of quoted part of the key
"""
r = -1 if reverse else 1
for k in key.split('.')[::r]:
if k.startswith('$'):
k = parse.quote(k)
yield k
def improve_keys(data, metaquery=False):
"""Improves keys in dict if they contained '.' or started with '$'.
:param data: is a dictionary where keys need to be checked and improved
:param metaquery: boolean, if True dots are not escaped from the keys
:return: improved dictionary if keys contained dots or started with '$':
{'a.b': 'v'} -> {'a': {'b': 'v'}}
{'$ab': 'v'} -> {'%24ab': 'v'}
"""
if not isinstance(data, dict):
return data
if metaquery:
for key in data.iterkeys():
if '.$' in key:
key_list = []
for k in quote_key(key):
key_list.append(k)
new_key = '.'.join(key_list)
data[new_key] = data.pop(key)
else:
for key, value in data.items():
if isinstance(value, dict):
improve_keys(value)
if '.' in key:
new_dict = {}
for k in quote_key(key, reverse=True):
new = {}
new[k] = new_dict if new_dict else data.pop(key)
new_dict = new
data.update(new_dict)
else:
if key.startswith('$'):
new_key = parse.quote(key)
data[new_key] = data.pop(key)
return data
def unquote_keys(data):
"""Restores initial view of 'quoted' keys in dictionary data
:param data: is a dictionary
:return: data with restored keys if they were 'quoted'.
"""
if isinstance(data, dict):
for key, value in data.items():
if isinstance(value, dict):
unquote_keys(value)
if key.startswith('%24'):
k = parse.unquote(key)
data[k] = data.pop(key)
return data
class ConnectionPool(object):
def __init__(self):

View File

@ -65,7 +65,7 @@ class Connection(base.Connection):
if pagination:
raise ceilometer.NotImplementedError('Pagination not implemented')
metaquery = metaquery or {}
metaquery = pymongo_utils.improve_keys(metaquery, metaquery=True) or {}
q = {}
if user is not None:
@ -138,4 +138,9 @@ class Connection(base.Connection):
s['counter_unit'] = s.get('counter_unit', '')
# Tolerate absence of recorded_at in older datapoints
s['recorded_at'] = s.get('recorded_at')
# Check samples for metadata and "unquote" key if initially it
# was started with '$'.
if s.get('resource_metadata'):
s['resource_metadata'] = pymongo_utils.unquote_keys(
s.get('resource_metadata'))
yield models.Sample(**s)

View File

@ -159,7 +159,9 @@ class TestListMeters(v2.FunctionalTest,
'properties': {
'prop_1': 'prop_value',
'prop_2': {'sub_prop_1':
'sub_prop_value'}
'sub_prop_value'},
'prop.3': {'$sub_prop.2':
'sub_prop_value2'}
},
'size': 0,
'util': 0.58,
@ -298,6 +300,22 @@ class TestListMeters(v2.FunctionalTest,
self.assertEqual('self.sample4', metadata['tag'])
self.assertEqual('prop_value', metadata['properties.prop_1'])
def test_list_meters_with_dict_metadata_with_dot_dollar_in_key(self):
data = self.get_json('/meters/meter.mine',
q=[{'field':
'metadata.properties.prop.3.$sub_prop.2',
'op': 'eq',
'value': 'sub_prop_value2',
}])
self.assertEqual(1, len(data))
self.assertEqual('resource-id4', data[0]['resource_id'])
metadata = data[0]['resource_metadata']
self.assertIsNotNone(metadata)
self.assertEqual('self.sample4', metadata['tag'])
self.assertEqual('prop_value', metadata['properties.prop_1'])
self.assertEqual('sub_prop_value',
metadata['properties.prop_2:sub_prop_1'])
def test_get_one_sample(self):
sample_id = self.messages[1]['message_id']
data = self.get_json('/samples/%s' % sample_id)
@ -347,13 +365,16 @@ class TestListMeters(v2.FunctionalTest,
u'type': u'gauge',
u'unit': u'',
u'source': u'test_source1',
u'metadata': {u'display_name': u'test-server',
u'properties.prop_2:sub_prop_1': u'sub_prop_value',
u'util': u'0.58',
u'tag': u'self.sample4',
u'properties.prop_1': u'prop_value',
u'is_public': u'True',
u'size': u'0'}
u'metadata': {
u'display_name': u'test-server',
u'properties.prop_2:sub_prop_1': u'sub_prop_value',
u'util': u'0.58',
u'tag': u'self.sample4',
u'properties.prop_1': u'prop_value',
u'is_public': u'True',
u'size': u'0',
u'properties.prop:3:$sub_prop:2': u'sub_prop_value2',
}
}], data)
def test_list_meters_metadata_query(self):

View File

@ -461,7 +461,7 @@ class TestListResources(v2.FunctionalTest,
timestamp=datetime.datetime(2012, 7, 2, 10, 40),
resource_metadata={'display_name': 'test-server',
'tag': 'self.sample',
'dict_properties': {'key': 'value'},
'dict_properties': {'key.$1': {'$key': 'val'}},
'not_ignored_list': ['returned'],
},
source='test',
@ -474,7 +474,7 @@ class TestListResources(v2.FunctionalTest,
data = self.get_json('/resources')
metadata = data[0]['metadata']
self.assertEqual([(u'dict_properties.key', u'value'),
self.assertEqual([(u'dict_properties.key:$1:$key', u'val'),
(u'display_name', u'test-server'),
(u'not_ignored_list', u"['returned']"),
(u'tag', u'self.sample')],

View File

@ -126,13 +126,26 @@ class DBTestBase(tests_db.TestBase):
class ResourceTest(DBTestBase,
tests_db.MixinTestsWithBackendScenarios):
def prepare_data(self):
super(ResourceTest, self).prepare_data()
self.msgs.append(self.create_and_store_sample(
timestamp=datetime.datetime(2012, 7, 2, 10, 39),
user_id='mongodb_test',
resource_id='resource-id-mongo_bad_key',
project_id='project-id-test',
metadata={'display.name': {'name.$1': 'test-server1',
'$name_2': 'test-server2'},
'tag': 'self.counter'},
source='test-4'
))
def test_get_resources(self):
expected_first_sample_timestamp = datetime.datetime(2012, 7, 2, 10, 39)
expected_last_sample_timestamp = datetime.datetime(2012, 7, 2, 10, 40)
msgs_sources = [msg['source'] for msg in self.msgs]
resources = list(self.conn.get_resources())
self.assertEqual(9, len(resources))
self.assertEqual(10, len(resources))
for resource in resources:
if resource.resource_id != 'resource-id':
continue
@ -172,7 +185,8 @@ class ResourceTest(DBTestBase,
def test_get_resources_end_timestamp(self):
timestamp = datetime.datetime(2012, 7, 2, 10, 42)
expected = set(['resource-id', 'resource-id-alternate',
'resource-id-5', 'resource-id-7'])
'resource-id-5', 'resource-id-7',
'resource-id-mongo_bad_key'])
resources = list(self.conn.get_resources(end_timestamp=timestamp))
resource_ids = [r.resource_id for r in resources]
@ -262,9 +276,15 @@ class ResourceTest(DBTestBase,
resources = list(self.conn.get_resources(metaquery=q))
self.assertEqual(9, len(resources))
def test_get_resources_by_metaquery_key_with_dot_in_metadata(self):
q = {'metadata.display.name.$name_2': 'test-server2',
'metadata.display.name.name.$1': 'test-server1'}
resources = list(self.conn.get_resources(metaquery=q))
self.assertEqual(1, len(resources))
def test_get_resources_by_empty_metaquery(self):
resources = list(self.conn.get_resources(metaquery={}))
self.assertEqual(9, len(resources))
self.assertEqual(10, len(resources))
def test_get_resources_most_recent_metadata_all(self):
resources = self.conn.get_resources()
@ -459,6 +479,20 @@ class MeterTestPagination(DBTestBase,
class RawSampleTest(DBTestBase,
tests_db.MixinTestsWithBackendScenarios):
def prepare_data(self):
super(RawSampleTest, self).prepare_data()
self.msgs.append(self.create_and_store_sample(
timestamp=datetime.datetime(2012, 7, 2, 10, 39),
user_id='mongodb_test',
resource_id='resource-id-mongo_bad_key',
project_id='project-id-test',
metadata={'display.name': {'name.$1': 'test-server1',
'$name_2': 'test-server2'},
'tag': 'self.counter'},
source='test-4'
))
def test_get_samples_limit_zero(self):
f = storage.SampleFilter()
results = list(self.conn.get_samples(f, limit=0))
@ -530,6 +564,14 @@ class RawSampleTest(DBTestBase,
del d['recorded_at']
self.assertIn(d, self.msgs)
def test_get_samples_by_metaquery_key_with_dot_in_metadata(self):
q = {'metadata.display.name.name.$1': 'test-server1',
'metadata.display.name.$name_2': 'test-server2'}
f = storage.SampleFilter(metaquery=q)
results = list(self.conn.get_samples(f))
self.assertIsNotNone(results)
self.assertEqual(1, len(results))
def test_get_samples_by_start_time(self):
timestamp = datetime.datetime(2012, 7, 2, 10, 41)
f = storage.SampleFilter(
@ -643,9 +685,9 @@ class RawSampleTest(DBTestBase,
self.conn.clear_expired_metering_data(3 * 60)
f = storage.SampleFilter(meter='instance')
results = list(self.conn.get_samples(f))
self.assertEqual(11, len(results))
self.assertEqual(12, len(results))
results = list(self.conn.get_resources())
self.assertEqual(9, len(results))
self.assertEqual(10, len(results))
@tests_db.run_with('sqlite', 'hbase', 'db2')
def test_clear_metering_data_with_alarms(self):