Deterministic, repeatable serialization for rings.
The (account|container|object).ring.gz files contain, among other things, a JSON-encoded dictionary. This change simply makes the JSON serializer sort the keys of that dictionary so that two Python-identical rings will result in two bytewise-identical ring files. Also, to get repeatable compression, we lock down the timestamp in the gzip output stream to a fixed value. (There's a timestamp value in a gzip stream header; by default, gzip.GzipFile sticks time.time() in there.) This only works on Python 2.7; on 2.6, the 'mtime' argument to gzip.GzipFile() is unsupported. Don't worry, serialization still works on 2.6. It just doesn't always produce the same bytes for the same ring. Change-Id: Ide446413d0aeb78536883933fd0caf440b8f54ad
This commit is contained in:
parent
85529531d6
commit
156cdc8edf
@ -85,7 +85,8 @@ class RingData(object):
|
||||
# Write out new-style serialization magic and version:
|
||||
file_obj.write(struct.pack('!4sH', 'R1NG', 1))
|
||||
ring = self.to_dict()
|
||||
json_text = json.dumps(
|
||||
json_encoder = json.JSONEncoder(sort_keys=True)
|
||||
json_text = json_encoder.encode(
|
||||
{'devs': ring['devs'], 'part_shift': ring['part_shift'],
|
||||
'replica_count': len(ring['replica2part2dev_id'])})
|
||||
json_len = len(json_text)
|
||||
@ -100,7 +101,16 @@ class RingData(object):
|
||||
|
||||
:param filename: File into which this instance should be serialized.
|
||||
"""
|
||||
gz_file = GzipFile(filename, 'wb')
|
||||
# Override the timestamp so that the same ring data creates
|
||||
# the same bytes on disk. This makes a checksum comparison a
|
||||
# good way to see if two rings are identical.
|
||||
#
|
||||
# This only works on Python 2.7; on 2.6, we always get the
|
||||
# current time in the gzip output.
|
||||
try:
|
||||
gz_file = GzipFile(filename, 'wb', mtime=1300507380.0)
|
||||
except TypeError:
|
||||
gz_file = GzipFile(filename, 'wb')
|
||||
self.serialize_v1(gz_file)
|
||||
gz_file.close()
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
import array
|
||||
import cPickle as pickle
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
from gzip import GzipFile
|
||||
from shutil import rmtree
|
||||
@ -67,6 +68,29 @@ class TestRingData(unittest.TestCase):
|
||||
rd2 = ring.RingData.load(ring_fname)
|
||||
self.assert_ring_data_equal(rd, rd2)
|
||||
|
||||
def test_deterministic_serialization(self):
|
||||
"""
|
||||
Two identical rings should produce identical .gz files on disk.
|
||||
|
||||
Only true on Python 2.7 or greater.
|
||||
"""
|
||||
if sys.version_info[0] == 2 and sys.version_info[1] < 7:
|
||||
return
|
||||
os.mkdir(os.path.join(self.testdir, '1'))
|
||||
os.mkdir(os.path.join(self.testdir, '2'))
|
||||
# These have to have the same filename (not full path,
|
||||
# obviously) since the filename gets encoded in the gzip data.
|
||||
ring_fname1 = os.path.join(self.testdir, '1', 'the.ring.gz')
|
||||
ring_fname2 = os.path.join(self.testdir, '2', 'the.ring.gz')
|
||||
rd = ring.RingData(
|
||||
[array.array('H', [0, 1, 0, 1]), array.array('H',[0, 1, 0, 1])],
|
||||
[{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30)
|
||||
rd.save(ring_fname1)
|
||||
rd.save(ring_fname2)
|
||||
with open(ring_fname1) as ring1:
|
||||
with open(ring_fname2) as ring2:
|
||||
self.assertEqual(ring1.read(), ring2.read())
|
||||
|
||||
|
||||
class TestRing(unittest.TestCase):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user