From 87a5114342f3481539a3dc849c109ebef061a135 Mon Sep 17 00:00:00 2001 From: Ilya Tyaptin Date: Fri, 26 Sep 2014 15:31:21 +0400 Subject: [PATCH] Add encoding of rows and qualifiers in impl_hbase Currently, in impl_hbase we dump only cell values. Respectively if row or qualifier contains non-ascii symbol exception is raised. This patchset adds encoding/decoding for rows and qualifiers. It uses encoding instead dumping for backward compatibility of rows and cell keys in tables. Change-Id: Id238d38b4ce1f91d9e5e760856e27c5f6eb0685c Closes-bug: #1350826 --- ceilometer/storage/hbase/utils.py | 22 ++++++++++++++----- ceilometer/storage/impl_hbase.py | 4 +++- .../tests/storage/test_storage_scenarios.py | 21 ++++++++++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/ceilometer/storage/hbase/utils.py b/ceilometer/storage/hbase/utils.py index b36714e5a7..b3c35cc644 100644 --- a/ceilometer/storage/hbase/utils.py +++ b/ceilometer/storage/hbase/utils.py @@ -364,9 +364,10 @@ def deserialize_entry(entry, get_raw_meta=True): metadata_flattened = {} for k, v in entry.items(): if k.startswith('f:s_'): - sources.append(k[4:]) + sources.append(decode_unicode(k[4:])) elif k.startswith('f:r_metadata.'): - metadata_flattened[k[len('f:r_metadata.'):]] = load(v) + qualifier = decode_unicode(k[len('f:r_metadata.'):]) + metadata_flattened[qualifier] = load(v) elif k.startswith("f:m_"): meter = ([unquote(i) for i in k[4:].split(':')], load(v)) meters.append(meter) @@ -402,17 +403,20 @@ def serialize_entry(data=None, **kwargs): # To make insertion safe we need to store all meters and sources in # a separate cell. For this purpose s_ and m_ prefixes are # introduced. - result['f:s_%s' % v] = dump('1') + qualifier = encode_unicode('f:s_%s' % v) + result[qualifier] = dump('1') elif k == 'meter': for meter, ts in v.items(): - result['f:m_%s' % meter] = dump(ts) + qualifier = encode_unicode('f:m_%s' % meter) + result[qualifier] = dump(ts) elif k == 'resource_metadata': # keep raw metadata as well as flattened to provide # capability with API v2. It will be flattened in another # way on API level. But we need flattened too for quick filtering. flattened_meta = dump_metadata(v) for key, m in flattened_meta.items(): - result['f:r_metadata.' + key] = dump(m) + metadata_qualifier = encode_unicode('f:r_metadata.' + key) + result[metadata_qualifier] = dump(m) result['f:resource_metadata'] = dump(v) else: result['f:' + quote(k, ':')] = dump(v) @@ -434,6 +438,14 @@ def load(data): return json.loads(data, object_hook=object_hook) +def encode_unicode(data): + return data.encode('utf-8') if isinstance(data, six.text_type) else data + + +def decode_unicode(data): + return data.decode('utf-8') if isinstance(data, six.string_types) else data + + # We don't want to have tzinfo in decoded json.This object_hook is # overwritten json_util.object_hook for $date def object_hook(dct): diff --git a/ceilometer/storage/impl_hbase.py b/ceilometer/storage/impl_hbase.py index 3cffee80f8..b20a0330a1 100644 --- a/ceilometer/storage/impl_hbase.py +++ b/ceilometer/storage/impl_hbase.py @@ -171,7 +171,8 @@ class Connection(hbase_base.Connection, base.Connection): # automatically 'on the top'. It is needed to keep metadata # up-to-date: metadata from newest samples is considered as actual. ts = int(time.mktime(data['timestamp'].timetuple()) * 1000) - resource_table.put(data['resource_id'], resource, ts) + resource_table.put(hbase_utils.encode_unicode(data['resource_id']), + resource, ts) # Rowkey consists of reversed timestamp, meter and a # message signature for purposes of uniqueness @@ -216,6 +217,7 @@ class Connection(hbase_base.Connection, base.Connection): for resource_id, data in resource_table.scan(filter=q): f_res, sources, meters, md = hbase_utils.deserialize_entry( data) + resource_id = hbase_utils.encode_unicode(resource_id) # Unfortunately happybase doesn't keep ordered result from # HBase. So that's why it's needed to find min and max # manually diff --git a/ceilometer/tests/storage/test_storage_scenarios.py b/ceilometer/tests/storage/test_storage_scenarios.py index b6a35af6fa..645cf0721b 100644 --- a/ceilometer/tests/storage/test_storage_scenarios.py +++ b/ceilometer/tests/storage/test_storage_scenarios.py @@ -3227,3 +3227,24 @@ class MongoTimeToLiveTest(DBTestBase, tests_db.MixinTestsWithBackendScenarios): ['resource_ttl']['expireAfterSeconds']) self.assertEqual(15, self.conn.db.meter.index_information() ['meter_ttl']['expireAfterSeconds']) + + +class TestRecordUnicodeSamples(DBTestBase, + tests_db.MixinTestsWithBackendScenarios): + def prepare_data(self): + self.msgs = [] + self.msgs.append(self.create_and_store_sample( + name=u'meter.accent\xe9\u0437', + metadata={u"metadata_key\xe9\u0437": "test", + u"metadata_key": u"test\xe9\u0437"}, + )) + + def test_unicode_sample(self): + f = storage.SampleFilter() + results = list(self.conn.get_samples(f)) + self.assertEqual(1, len(results)) + expected = self.msgs[0] + actual = results[0].as_dict() + self.assertEqual(expected['counter_name'], actual['counter_name']) + self.assertEqual(expected['resource_metadata'], + actual['resource_metadata'])