Use sort_keys with json almost everywhere we write to ZK

For almost any data we write to ZK (except for long-standing nodepool
classes), add the sort_keys=True so that we can more easily determine
whether an update is required.

This is in service of zkobject, and is not strictly necessary because
the json module follows dict insertion order, and our serialize methods
are obviously internally consistent (at least, if they're going to produce
the same data, which is all we care about).  But that hasn't always been
true and might not be true in the future, so this is good future-proofing.

Based on a similar thought, the argument is also added to several places
which do not use zkobject but which do write to ZK, in case we perform
a similar check in the future.  This seems like a good habit to use
throughout the code base.

Change-Id: Idca67942c057ab0e6b629b50b9b3367ccc0e4ad7
This commit is contained in:
James E. Blair 2021-11-12 15:43:24 -08:00
parent f97a6f627e
commit b7e2e49f7f
13 changed files with 43 additions and 38 deletions

View File

@ -1226,7 +1226,8 @@ class AnsibleJob(object):
result=None,
error_detail=f'Failed to update project '
f'{task.project_name}')
self.job.sendWorkComplete(json.dumps(result))
self.job.sendWorkComplete(
json.dumps(result, sort_keys=True))
return
raise ExecutorError(

View File

@ -70,7 +70,7 @@ class KeyStorage(ZooKeeperBase):
if not path.startswith('/keystorage'):
self.log.error(f"Invalid path: {path}")
return
data = json.dumps(data).encode('utf8')
data = json.dumps(data, sort_keys=True).encode('utf8')
try:
self.kazoo_client.create(path, value=data, makepath=True)
self.log.info(f"Created key at {path}")
@ -124,7 +124,7 @@ class KeyStorage(ZooKeeperBase):
def saveProjectSSHKeys(self, connection_name, project_name, keydata):
"""Store the complete internal data structure"""
key_path = self.getSSHKeysPath(connection_name, project_name)
data = json.dumps(keydata).encode("utf-8")
data = json.dumps(keydata, sort_keys=True).encode("utf-8")
self.kazoo_client.create(key_path, value=data, makepath=True)
def deleteProjectSSHKeys(self, connection_name, project_name):
@ -207,7 +207,7 @@ class KeyStorage(ZooKeeperBase):
"""Store the complete internal data structure"""
key_path = self.getProjectSecretsKeysPath(
connection_name, project_name)
data = json.dumps(keydata).encode("utf-8")
data = json.dumps(keydata, sort_keys=True).encode("utf-8")
self.kazoo_client.create(key_path, value=data, makepath=True)
def deleteProjectsSecretsKeys(self, connection_name, project_name):

View File

@ -346,7 +346,7 @@ class BaseMergeServer(metaclass=ABCMeta):
if result is None:
result = {}
payload = json.dumps(result)
payload = json.dumps(result, sort_keys=True)
self.log.debug("Completed %s job %s: payload size: %s",
merge_request.job_type, merge_request.uuid,
sys.getsizeof(payload))

View File

@ -274,7 +274,7 @@ class ConfigurationErrorList(zkobject.ShardedZKObject):
data = {
"errors": [e.serialize() for e in self.errors],
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
@ -657,7 +657,7 @@ class PipelineState(zkobject.ZKObject):
"queues": [q.getPath() for q in self.queues],
"old_queues": [q.getPath() for q in self.old_queues],
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
@ -784,7 +784,7 @@ class PipelineChangeList(zkobject.ZKObject):
data = {
"changes": self.changes,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, data, context):
data = super().deserialize(data, context)
@ -827,7 +827,7 @@ class PipelineSummary(zkobject.ShardedZKObject):
data = {
"status": self.status,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
def refresh(self, context):
# Ignore exceptions and just re-use the previous state. This
@ -894,7 +894,7 @@ class ChangeQueue(zkobject.ZKObject):
"window_decrease_factor": self.window_decrease_factor,
"dynamic": self.dynamic,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
@ -1881,7 +1881,7 @@ class JobData(zkobject.ShardedZKObject):
def getHash(data):
hasher = hashlib.sha256()
# Use json_dumps to strip any ZuulMark entries
hasher.update(json_dumps(data).encode('utf8'))
hasher.update(json_dumps(data, sort_keys=True).encode('utf8'))
return hasher.hexdigest()
def serialize(self):
@ -1890,7 +1890,7 @@ class JobData(zkobject.ShardedZKObject):
"hash": self.hash,
"_path": self._path,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
class FrozenJob(zkobject.ZKObject):
@ -1959,7 +1959,8 @@ class FrozenJob(zkobject.ZKObject):
if v:
# If the value is long, we need to make this a JobData;
# otherwise we can use the dict as-is.
if len(json_dumps(v).encode('utf8')) > klass.MAX_DATA_LEN:
if (len(json_dumps(v, sort_keys=True).encode('utf8')) >
klass.MAX_DATA_LEN):
job_data_vars[k] = v
v = None
kw['_' + k] = v
@ -2017,7 +2018,7 @@ class FrozenJob(zkobject.ZKObject):
data[k] = v
# Use json_dumps to strip any ZuulMark entries
return json_dumps(data).encode("utf8")
return json_dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
@ -2176,7 +2177,8 @@ class FrozenJob(zkobject.ZKObject):
def _makeJobData(self, context, name, data):
# If the data is large, store it in another object
if len(json_dumps(data).encode('utf8')) > self.MAX_DATA_LEN:
if (len(json_dumps(data, sort_keys=True).encode('utf8')) >
self.MAX_DATA_LEN):
return JobData.new(
context, _path=self.getPath() + '/' + name,
data=data)
@ -3265,7 +3267,7 @@ class ResultData(zkobject.ShardedZKObject):
"data": self.data,
"_path": self._path,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
class Build(zkobject.ZKObject):
@ -3322,7 +3324,7 @@ class Build(zkobject.ZKObject):
"zuul_event_id": self.zuul_event_id,
"build_request_ref": self.build_request_ref,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
@ -3430,7 +3432,7 @@ class RepoFiles(zkobject.ShardedZKObject):
"connections": self.connections,
"_buildset_path": self._buildset_path,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
class BaseRepoState(zkobject.ShardedZKObject):
@ -3466,7 +3468,7 @@ class BaseRepoState(zkobject.ShardedZKObject):
"state": self.state,
"_buildset_path": self._buildset_path,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
class MergeRepoState(BaseRepoState):
@ -3640,7 +3642,7 @@ class BuildSet(zkobject.ZKObject):
if self.job_graph else None),
# jobs (serialize as separate objects)
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
@ -4001,7 +4003,7 @@ class QueueItem(zkobject.ZKObject):
"bundle": self.bundle and self.bundle.serialize(),
"dequeued_bundle_failing": self.dequeued_bundle_failing,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
@ -7612,7 +7614,7 @@ class HoldRequest(object):
Used for storing the object data in ZooKeeper.
'''
return json.dumps(self.toDict()).encode('utf8')
return json.dumps(self.toDict(), sort_keys=True).encode('utf8')
# AuthZ models

View File

@ -64,7 +64,7 @@ class BranchCacheZKObject(ShardedZKObject):
"protected": self.protected,
"remainder": self.remainder,
}
return json.dumps(data).encode("utf8")
return json.dumps(data, sort_keys=True).encode("utf8")
def _save(self, context, data, create=False):
super()._save(context, data, create)

View File

@ -81,7 +81,7 @@ class ChangeKey:
revision=revision,
)
self.reference = json.dumps(reference)
self.reference = json.dumps(reference, sort_keys=True)
msg = self.reference.encode('utf8')
self._hash = hashlib.sha256(msg).hexdigest()
@ -319,7 +319,7 @@ class AbstractChangeCache(ZooKeeperSimpleBase, Iterable, abc.ABC):
def set(self, key, change, version=-1):
data = self._dataFromChange(change)
raw_data = json.dumps(data).encode("utf8")
raw_data = json.dumps(data, sort_keys=True).encode("utf8")
data_uuid = self._setData(raw_data)
# Add the change_key info here mostly for debugging since the
@ -327,7 +327,7 @@ class AbstractChangeCache(ZooKeeperSimpleBase, Iterable, abc.ABC):
cache_data = json.dumps(dict(
data_uuid=data_uuid,
key_reference=key.reference,
))
), sort_keys=True)
cache_path = self._cachePath(key._hash)
with self._change_locks[key._hash]:
try:

View File

@ -90,7 +90,7 @@ class BaseComponent(ZooKeeperBase):
return
# Update the ZooKeeper node
content = json.dumps(self.content).encode("utf-8")
content = json.dumps(self.content, sort_keys=True).encode("utf-8")
try:
zstat = self.kazoo_client.set(
self.path, content, version=self._zstat.version
@ -105,7 +105,7 @@ class BaseComponent(ZooKeeperBase):
self.log.info("Registering component in ZooKeeper %s", path)
self.path, self._zstat = self.kazoo_client.create(
path,
json.dumps(self.content).encode("utf-8"),
json.dumps(self.content, sort_keys=True).encode("utf-8"),
makepath=True,
ephemeral=True,
sequence=True,

View File

@ -53,7 +53,7 @@ class FilesCache(ZooKeeperSimpleBase, MutableMapping):
"extra_dirs_searched": list(extra_config_dirs),
"ltime": ltime,
}
payload = json.dumps(data).encode("utf8")
payload = json.dumps(data, sort_keys=True).encode("utf8")
try:
self.kazoo_client.set(self.root_path, payload)
except NoNodeError:
@ -236,6 +236,6 @@ class SystemConfigCache(ZooKeeperSimpleBase):
self.kazoo_client, self.conf_path
) as stream:
stream.truncate(0)
stream.write(json.dumps(data).encode("utf8"))
stream.write(json.dumps(data, sort_keys=True).encode("utf8"))
zstat = self.kazoo_client.exists(self.conf_path)
unparsed_abide.ltime = zstat.last_modified_transaction_id

View File

@ -252,17 +252,18 @@ class ZooKeeperEventQueue(ZooKeeperSimpleBase, Iterable):
event_path = f"{self.event_root}/"
side_channel_data = None
encoded_data = json.dumps(data).encode("utf-8")
encoded_data = json.dumps(data, sort_keys=True).encode("utf-8")
if (len(encoded_data) > sharding.NODE_BYTE_SIZE_LIMIT
and 'event_data' in data):
# Get a unique data node
data_id = str(uuid.uuid4())
data_root = f'{self.data_root}/{data_id}'
side_channel_data = json.dumps(data['event_data']).encode("utf-8")
side_channel_data = json.dumps(data['event_data'],
sort_keys=True).encode("utf-8")
data = data.copy()
del data['event_data']
data['event_data_path'] = data_root
encoded_data = json.dumps(data).encode("utf-8")
encoded_data = json.dumps(data, sort_keys=True).encode("utf-8")
with sharding.BufferedShardWriter(
self.kazoo_client, data_root) as stream:
@ -590,7 +591,7 @@ class ManagementEventQueue(ZooKeeperEventQueue):
try:
self.kazoo_client.set(
event.result_ref,
json.dumps(result_data).encode("utf-8"),
json.dumps(result_data, sort_keys=True).encode("utf-8"),
)
except NoNodeError:
self.log.warning(f"No result node found for {event}; "

View File

@ -562,7 +562,7 @@ class JobRequestQueue(ZooKeeperSimpleBase):
@staticmethod
def _dictToBytes(data):
# The custom json_dumps() will also serialize MappingProxyType objects
return json_dumps(data).encode("utf-8")
return json_dumps(data, sort_keys=True).encode("utf-8")
def _getParamsPath(self, uuid):
return '/'.join([self.PARAM_ROOT, uuid])

View File

@ -126,7 +126,7 @@ class LayoutStateStore(ZooKeeperBase, MutableMapping):
def __setitem__(self, tenant_name, state):
path = f"{self.layout_root}/{tenant_name}"
self.kazoo_client.ensure_path(path)
data = json.dumps(state.toDict()).encode("utf-8")
data = json.dumps(state.toDict(), sort_keys=True).encode("utf-8")
zstat = self.kazoo_client.set(path, data)
# Set correct ltime of the layout in Zookeeper
state.ltime = zstat.last_modified_transaction_id

View File

@ -30,7 +30,7 @@ def holdersFromData(data):
def holdersToData(holders):
return json.dumps(holders).encode("utf8")
return json.dumps(holders, sort_keys=True).encode("utf8")
class SemaphoreHandler(ZooKeeperSimpleBase):

View File

@ -39,7 +39,8 @@ class ZuulSystem(ZooKeeperBase):
data, stat = self.kazoo_client.get(SYSTEM_ROOT)
except NoNodeError:
system_id = uuid.uuid4().hex
data = json.dumps({'system_id': system_id}).encode('utf8')
data = json.dumps({'system_id': system_id},
sort_keys=True).encode('utf8')
try:
self.kazoo_client.create(SYSTEM_ROOT, data)
except NodeExistsError: