Switch to ruamel for yaml handling

The Ruamel library has better support for modifying YAML while keeping
things like comments. This updates our yamlutil module to use
ruamel.yaml instead of PyYAML.

Story: 2002908
Task: 22880

Change-Id: I4ac66c9e3e40780b588377c1dfe42511eed231a3
Signed-off-by: Sean McGinnis <sean.mcginnis@gmail.com>
This commit is contained in:
Sean McGinnis 2019-07-08 11:40:11 -05:00
parent 576206353a
commit b6ca269386
No known key found for this signature in database
GPG Key ID: CE7EE4BFAF8D70C8
4 changed files with 16 additions and 111 deletions

View File

@ -7,3 +7,4 @@ icalendar>=3.10 # BSD
whereto>=0.3.0 # Apache-2.0 whereto>=0.3.0 # Apache-2.0
mwclient==0.8.1 mwclient==0.8.1
yamlordereddictloader yamlordereddictloader
ruamel.yaml>=0.15

View File

@ -36,7 +36,7 @@ def get_deliverable_data(series, deliverable):
deliverable_filename = 'deliverables/%s/%s.yaml' % ( deliverable_filename = 'deliverables/%s/%s.yaml' % (
series, deliverable) series, deliverable)
with open(deliverable_filename, 'r', encoding='utf-8') as f: with open(deliverable_filename, 'r', encoding='utf-8') as f:
return yamlutils.loads(f.read()) return yamlutils.loads(f)
def increment_version(old_version, increment): def increment_version(old_version, increment):

View File

@ -12,120 +12,23 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import collections import ruamel.yaml
import re import ruamel.yaml.compat
import six
import yaml
import yamlordereddictloader
_LIKE_A_NUMBER = re.compile('^[0-9]+.[0-9]+$')
def _has_newline(data):
if "\n" in data or "\r" in data:
return True
return False
class PrettySafeDumper(yaml.dumper.SafeDumper):
"""Yaml dumper that tries to not alter original formats (too much)."""
BINARY_ENCODING = 'utf-8'
def represent_ordereddict(self, data):
values = []
node = yaml.nodes.MappingNode(
'tag:yaml.org,2002:map', values, flow_style=None)
for key, value in data.items():
key_item = self.represent_data(key)
value_item = self.represent_data(value)
values.append((key_item, value_item))
return node
def ignore_aliases(self, data):
# Never output alias references; always repeat the data.
return True
def represent_bool(self, data):
if data:
value = 'yes'
else:
value = 'no'
return self.represent_scalar('tag:yaml.org,2002:bool', value)
def choose_scalar_style(self):
# Avoid messing up dict keys...
if self.states[-1] == self.expect_block_mapping_simple_value:
self.event.style = 'plain'
return super(PrettySafeDumper, self).choose_scalar_style()\
if self.event.style != 'plain' else ("'" if ' ' in
self.event.value else None)
def represent_string(self, data):
if isinstance(data, six.binary_type):
data = data.decode(self.BINARY_ENCODING)
style = "plain"
if _has_newline(data):
style = "|"
elif _LIKE_A_NUMBER.match(data):
style = '"'
return yaml.representer.ScalarNode('tag:yaml.org,2002:str',
data, style=style)
def represent_undefined(self, data):
if isinstance(data, collections.OrderedDict):
return self.represent_odict(data)
else:
return super(PrettySafeDumper, self).represent_undefined(data)
# Override this method to always indent. Otherwise when a list is
# emitted, the items nested under it are not and we have inconsistent
# formatting.
# https://stackoverflow.com/questions/25108581/python-yaml-dump-bad-indentation
def increase_indent(self, flow=False, indentless=False):
return super(PrettySafeDumper, self).increase_indent(flow, False)
# NOTE(harlowja): at some point this may not be needed...
# See: http://pyyaml.org/ticket/29
PrettySafeDumper.add_representer(collections.OrderedDict,
PrettySafeDumper.represent_ordereddict)
PrettySafeDumper.add_representer(None,
PrettySafeDumper.represent_undefined)
# NOTE(dhellmann): The representer functions in the base class are
# specified by class.method-name so we have to re-register the
# representer for bool if we want to override it.
PrettySafeDumper.add_representer(bool,
PrettySafeDumper.represent_bool)
# Ensure we use our own routine here, because the style that comes by
# default is sort of wonky and messes up the values....
for str_type in [six.binary_type, six.text_type]:
PrettySafeDumper.add_representer(str_type,
PrettySafeDumper.represent_string)
def dumps(obj): def dumps(obj):
"""Dump a python object -> blob and apply our pretty styling.""" """Dumps yaml content into a string."""
buff = six.StringIO() yaml = ruamel.yaml.YAML()
yaml.dump_all([obj], buff, yaml.width = 66
explicit_start=True,
indent=2, stream = ruamel.yaml.compat.StringIO()
default_flow_style=False, yaml.explicit_start = True
line_break="\n", yaml.dump(obj, stream)
width=66, return stream.getvalue()
Dumper=PrettySafeDumper,
allow_unicode=True)
return buff.getvalue()
def loads(blob): def loads(blob):
"""Load a yaml blob and retain key ordering.""" """Load a yaml blob and retain key ordering."""
# This does use load, which is unsafe, but should be ok yaml = ruamel.yaml.YAML()
# for what we are loading here in this program; we should yaml.version = '1.1'
# be able to fix that in the future (if it matters). return yaml.load(blob)
return yaml.load(blob, Loader=yamlordereddictloader.Loader)

View File

@ -22,6 +22,7 @@ tqdm
mwclient==0.8.1 mwclient==0.8.1
jsonschema>=2.6.0 jsonschema>=2.6.0
twine>=1.13.0 twine>=1.13.0
ruamel.yaml>=0.15
# For release notes generation. # For release notes generation.
Jinja2>=2.6 # BSD License (3 clause) Jinja2>=2.6 # BSD License (3 clause)