Browse Source
The sync includes change that makes sure we get unicode-only dicts from jsonutils no matter which json module implementation is selected. The latest commit in oslo-incubator: - 0f4586c0076183c6356eec682c8a593648125abd The sync adds a new 'strutils' module that is now used in jsonutils. Change-Id: Ic815ca3df94c33edec9104172048b2cd94b92e3f Closes-Bug: 1314129changes/95/95695/3
4 changed files with 365 additions and 73 deletions
@ -0,0 +1,239 @@
|
||||
# Copyright 2011 OpenStack Foundation. |
||||
# All Rights Reserved. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may |
||||
# not use this file except in compliance with the License. You may obtain |
||||
# a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
||||
# License for the specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
""" |
||||
System-level utilities and helper functions. |
||||
""" |
||||
|
||||
import math |
||||
import re |
||||
import sys |
||||
import unicodedata |
||||
|
||||
import six |
||||
|
||||
from neutron.openstack.common.gettextutils import _ |
||||
|
||||
|
||||
UNIT_PREFIX_EXPONENT = { |
||||
'k': 1, |
||||
'K': 1, |
||||
'Ki': 1, |
||||
'M': 2, |
||||
'Mi': 2, |
||||
'G': 3, |
||||
'Gi': 3, |
||||
'T': 4, |
||||
'Ti': 4, |
||||
} |
||||
UNIT_SYSTEM_INFO = { |
||||
'IEC': (1024, re.compile(r'(^[-+]?\d*\.?\d+)([KMGT]i?)?(b|bit|B)$')), |
||||
'SI': (1000, re.compile(r'(^[-+]?\d*\.?\d+)([kMGT])?(b|bit|B)$')), |
||||
} |
||||
|
||||
TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes') |
||||
FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no') |
||||
|
||||
SLUGIFY_STRIP_RE = re.compile(r"[^\w\s-]") |
||||
SLUGIFY_HYPHENATE_RE = re.compile(r"[-\s]+") |
||||
|
||||
|
||||
def int_from_bool_as_string(subject): |
||||
"""Interpret a string as a boolean and return either 1 or 0. |
||||
|
||||
Any string value in: |
||||
|
||||
('True', 'true', 'On', 'on', '1') |
||||
|
||||
is interpreted as a boolean True. |
||||
|
||||
Useful for JSON-decoded stuff and config file parsing |
||||
""" |
||||
return bool_from_string(subject) and 1 or 0 |
||||
|
||||
|
||||
def bool_from_string(subject, strict=False, default=False): |
||||
"""Interpret a string as a boolean. |
||||
|
||||
A case-insensitive match is performed such that strings matching 't', |
||||
'true', 'on', 'y', 'yes', or '1' are considered True and, when |
||||
`strict=False`, anything else returns the value specified by 'default'. |
||||
|
||||
Useful for JSON-decoded stuff and config file parsing. |
||||
|
||||
If `strict=True`, unrecognized values, including None, will raise a |
||||
ValueError which is useful when parsing values passed in from an API call. |
||||
Strings yielding False are 'f', 'false', 'off', 'n', 'no', or '0'. |
||||
""" |
||||
if not isinstance(subject, six.string_types): |
||||
subject = six.text_type(subject) |
||||
|
||||
lowered = subject.strip().lower() |
||||
|
||||
if lowered in TRUE_STRINGS: |
||||
return True |
||||
elif lowered in FALSE_STRINGS: |
||||
return False |
||||
elif strict: |
||||
acceptable = ', '.join( |
||||
"'%s'" % s for s in sorted(TRUE_STRINGS + FALSE_STRINGS)) |
||||
msg = _("Unrecognized value '%(val)s', acceptable values are:" |
||||
" %(acceptable)s") % {'val': subject, |
||||
'acceptable': acceptable} |
||||
raise ValueError(msg) |
||||
else: |
||||
return default |
||||
|
||||
|
||||
def safe_decode(text, incoming=None, errors='strict'): |
||||
"""Decodes incoming text/bytes string using `incoming` if they're not |
||||
already unicode. |
||||
|
||||
:param incoming: Text's current encoding |
||||
:param errors: Errors handling policy. See here for valid |
||||
values http://docs.python.org/2/library/codecs.html |
||||
:returns: text or a unicode `incoming` encoded |
||||
representation of it. |
||||
:raises TypeError: If text is not an instance of str |
||||
""" |
||||
if not isinstance(text, (six.string_types, six.binary_type)): |
||||
raise TypeError("%s can't be decoded" % type(text)) |
||||
|
||||
if isinstance(text, six.text_type): |
||||
return text |
||||
|
||||
if not incoming: |
||||
incoming = (sys.stdin.encoding or |
||||
sys.getdefaultencoding()) |
||||
|
||||
try: |
||||
return text.decode(incoming, errors) |
||||
except UnicodeDecodeError: |
||||
# Note(flaper87) If we get here, it means that |
||||
# sys.stdin.encoding / sys.getdefaultencoding |
||||
# didn't return a suitable encoding to decode |
||||
# text. This happens mostly when global LANG |
||||
# var is not set correctly and there's no |
||||
# default encoding. In this case, most likely |
||||
# python will use ASCII or ANSI encoders as |
||||
# default encodings but they won't be capable |
||||
# of decoding non-ASCII characters. |
||||
# |
||||
# Also, UTF-8 is being used since it's an ASCII |
||||
# extension. |
||||
return text.decode('utf-8', errors) |
||||
|
||||
|
||||
def safe_encode(text, incoming=None, |
||||
encoding='utf-8', errors='strict'): |
||||
"""Encodes incoming text/bytes string using `encoding`. |
||||
|
||||
If incoming is not specified, text is expected to be encoded with |
||||
current python's default encoding. (`sys.getdefaultencoding`) |
||||
|
||||
:param incoming: Text's current encoding |
||||
:param encoding: Expected encoding for text (Default UTF-8) |
||||
:param errors: Errors handling policy. See here for valid |
||||
values http://docs.python.org/2/library/codecs.html |
||||
:returns: text or a bytestring `encoding` encoded |
||||
representation of it. |
||||
:raises TypeError: If text is not an instance of str |
||||
""" |
||||
if not isinstance(text, (six.string_types, six.binary_type)): |
||||
raise TypeError("%s can't be encoded" % type(text)) |
||||
|
||||
if not incoming: |
||||
incoming = (sys.stdin.encoding or |
||||
sys.getdefaultencoding()) |
||||
|
||||
if isinstance(text, six.text_type): |
||||
return text.encode(encoding, errors) |
||||
elif text and encoding != incoming: |
||||
# Decode text before encoding it with `encoding` |
||||
text = safe_decode(text, incoming, errors) |
||||
return text.encode(encoding, errors) |
||||
else: |
||||
return text |
||||
|
||||
|
||||
def string_to_bytes(text, unit_system='IEC', return_int=False): |
||||
"""Converts a string into an float representation of bytes. |
||||
|
||||
The units supported for IEC :: |
||||
|
||||
Kb(it), Kib(it), Mb(it), Mib(it), Gb(it), Gib(it), Tb(it), Tib(it) |
||||
KB, KiB, MB, MiB, GB, GiB, TB, TiB |
||||
|
||||
The units supported for SI :: |
||||
|
||||
kb(it), Mb(it), Gb(it), Tb(it) |
||||
kB, MB, GB, TB |
||||
|
||||
Note that the SI unit system does not support capital letter 'K' |
||||
|
||||
:param text: String input for bytes size conversion. |
||||
:param unit_system: Unit system for byte size conversion. |
||||
:param return_int: If True, returns integer representation of text |
||||
in bytes. (default: decimal) |
||||
:returns: Numerical representation of text in bytes. |
||||
:raises ValueError: If text has an invalid value. |
||||
|
||||
""" |
||||
try: |
||||
base, reg_ex = UNIT_SYSTEM_INFO[unit_system] |
||||
except KeyError: |
||||
msg = _('Invalid unit system: "%s"') % unit_system |
||||
raise ValueError(msg) |
||||
match = reg_ex.match(text) |
||||
if match: |
||||
magnitude = float(match.group(1)) |
||||
unit_prefix = match.group(2) |
||||
if match.group(3) in ['b', 'bit']: |
||||
magnitude /= 8 |
||||
else: |
||||
msg = _('Invalid string format: %s') % text |
||||
raise ValueError(msg) |
||||
if not unit_prefix: |
||||
res = magnitude |
||||
else: |
||||
res = magnitude * pow(base, UNIT_PREFIX_EXPONENT[unit_prefix]) |
||||
if return_int: |
||||
return int(math.ceil(res)) |
||||
return res |
||||
|
||||
|
||||
def to_slug(value, incoming=None, errors="strict"): |
||||
"""Normalize string. |
||||
|
||||
Convert to lowercase, remove non-word characters, and convert spaces |
||||
to hyphens. |
||||
|
||||
Inspired by Django's `slugify` filter. |
||||
|
||||
:param value: Text to slugify |
||||
:param incoming: Text's current encoding |
||||
:param errors: Errors handling policy. See here for valid |
||||
values http://docs.python.org/2/library/codecs.html |
||||
:returns: slugified unicode representation of `value` |
||||
:raises TypeError: If text is not an instance of str |
||||
""" |
||||
value = safe_decode(value, incoming, errors) |
||||
# NOTE(aababilov): no need to use safe_(encode|decode) here: |
||||
# encodings are always "ascii", error handling is always "ignore" |
||||
# and types are always known (first: unicode; second: str) |
||||
value = unicodedata.normalize("NFKD", value).encode( |
||||
"ascii", "ignore").decode("ascii") |
||||
value = SLUGIFY_STRIP_RE.sub("", value).strip().lower() |
||||
return SLUGIFY_HYPHENATE_RE.sub("-", value) |
Loading…
Reference in new issue