diff --git a/oslo_utils/strutils.py b/oslo_utils/strutils.py index 782903d0..2851ced8 100644 --- a/oslo_utils/strutils.py +++ b/oslo_utils/strutils.py @@ -58,30 +58,37 @@ _SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password', # _SANITIZE_KEYS we already have. This way, we only have to add the new key # to the list of _SANITIZE_KEYS and we can generate regular expressions # for XML and JSON automatically. -_SANITIZE_PATTERNS_2 = [] -_SANITIZE_PATTERNS_1 = [] +_SANITIZE_PATTERNS_2 = {} +_SANITIZE_PATTERNS_1 = {} # NOTE(amrith): Some regular expressions have only one parameter, some # have two parameters. Use different lists of patterns here. _FORMAT_PATTERNS_1 = [r'(%(key)s\s*[=]\s*)[^\s^\'^\"]+'] -_FORMAT_PATTERNS_2 = [r'(%(key)s\s*[=]\s*[\"\']).*?([\"\'])', - r'(%(key)s\s+[\"\']).*?([\"\'])', +_FORMAT_PATTERNS_2 = [r'(%(key)s\s*[=]\s*[\"\'])[^\"\']*([\"\'])', + r'(%(key)s\s+[\"\'])[^\"\']*([\"\'])', r'([-]{2}%(key)s\s+)[^\'^\"^=^\s]+([\s]*)', - r'(<%(key)s>).*?()', - r'([\"\']%(key)s[\"\']\s*:\s*[\"\']).*?([\"\'])', - r'([\'"].*?%(key)s[\'"]\s*:\s*u?[\'"]).*?([\'"])', - r'([\'"].*?%(key)s[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?' - '[\'"]).*?([\'"])', + r'(<%(key)s>)[^<]*()', + r'([\"\']%(key)s[\"\']\s*:\s*[\"\'])[^\"\']*([\"\'])', + r'([\'"][^"\']*%(key)s[\'"]\s*:\s*u?[\'"])[^\"\']*' + '([\'"])', + r'([\'"][^\'"]*%(key)s[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?' + '[\'"])[^\"\']*([\'"])', r'(%(key)s\s*--?[A-z]+\s*)\S+(\s*)'] +# NOTE(dhellmann): Keep a separate list of patterns by key so we only +# need to apply the substitutions for keys we find using a quick "in" +# test. for key in _SANITIZE_KEYS: + _SANITIZE_PATTERNS_1[key] = [] + _SANITIZE_PATTERNS_2[key] = [] + for pattern in _FORMAT_PATTERNS_2: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) - _SANITIZE_PATTERNS_2.append(reg_ex) + _SANITIZE_PATTERNS_2[key].append(reg_ex) for pattern in _FORMAT_PATTERNS_1: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) - _SANITIZE_PATTERNS_1.append(reg_ex) + _SANITIZE_PATTERNS_1[key].append(reg_ex) def int_from_bool_as_string(subject): @@ -230,19 +237,18 @@ def mask_password(message, secret="***"): # byte string. A better solution will be provided in Kilo. pass + substitute1 = r'\g<1>' + secret + substitute2 = r'\g<1>' + secret + r'\g<2>' + # NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. - if not any(key in message for key in _SANITIZE_KEYS): - return message - - substitute = r'\g<1>' + secret + r'\g<2>' - for pattern in _SANITIZE_PATTERNS_2: - message = re.sub(pattern, substitute, message) - - substitute = r'\g<1>' + secret - for pattern in _SANITIZE_PATTERNS_1: - message = re.sub(pattern, substitute, message) + for key in _SANITIZE_KEYS: + if key in message: + for pattern in _SANITIZE_PATTERNS_2[key]: + message = re.sub(pattern, substitute2, message) + for pattern in _SANITIZE_PATTERNS_1[key]: + message = re.sub(pattern, substitute1, message) return message diff --git a/tools/perf_test_mask_password.py b/tools/perf_test_mask_password.py new file mode 100644 index 00000000..f69a734d --- /dev/null +++ b/tools/perf_test_mask_password.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Performance tests for mask_password. +""" + +from __future__ import print_function + +import timeit + +from oslo_utils import strutils + +# A moderately sized input (~50K) string +# http://paste.openstack.org/raw/155864/ +# infile = '155864.txt' + +# Untruncated version of the above (~310K) +# http://dl.sileht.net/public/payload.json.gz +infile = 'large_json_payload.txt' + +with open(infile, 'r') as f: + input_str = f.read() +print('payload has %d bytes' % len(input_str)) + +for pattern in strutils._SANITIZE_PATTERNS_2['admin_pass']: + print('\ntesting %s' % pattern.pattern) + t = timeit.Timer( + "re.sub(pattern, r'\g<1>***\g<2>', payload)", + """ +import re +payload = '''%s''' +pattern = re.compile(r'''%s''') +""" % (input_str, pattern.pattern)) + print(t.timeit(1)) + +t = timeit.Timer( + "strutils.mask_password('''" + input_str + "''')", + "from oslo_utils import strutils", +) +print(t.timeit(1))