From b973f522b874dd88e19eb8fe15e4a6ce492d7e78 Mon Sep 17 00:00:00 2001 From: Kazuhiro MIYAHARA Date: Fri, 4 Sep 2015 15:38:44 +0900 Subject: [PATCH] Fix parsing request path with slash delimiters This patch fixes access_processor to parse request path correctly when the request path includes query with slash delimiters. For example, existing access_processor's result of log line with a request path "/v1/acc/con?prefix=YYYY/MM/DD" is as follows: account = "acc" container_name = "con" object_name = "MM/DD" It is because, existing access_processor splits query from request paths *AFTER* splits them into account, container_name, object_name. This patch changes splitting query timing. --- slogging/access_processor.py | 16 ++++------- test_slogging/unit/test_access_processor.py | 32 +++++++++++++++++++++ 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/slogging/access_processor.py b/slogging/access_processor.py index 5b32c79..4850ff5 100644 --- a/slogging/access_processor.py +++ b/slogging/access_processor.py @@ -20,6 +20,7 @@ from tzlocal import get_localzone from datetime import datetime from slogging import common import pytz +from urlparse import urlparse # conditionalize the return_ips method based on whether or not iptools # is present in the system. Without iptools, you will lack CIDR support. @@ -98,6 +99,9 @@ class AccessLogProcessor(object): {'found': server, 'expected': self.server_name}) return {} try: + parsed_url = urlparse(request) + request = parsed_url.path + query = parsed_url.query (version, account, container_name, object_name) = \ split_path(request, 2, 4, True) except ValueError, e: @@ -112,15 +116,9 @@ class AccessLogProcessor(object): self.logger.debug(_('Unexpected Swift version string: found ' \ '"%s" expected "v1"') % version) return {} - if container_name is not None: - container_name = container_name.split('?', 1)[0] - if object_name is not None: - object_name = object_name.split('?', 1)[0] - account = account.split('?', 1)[0] - query = None - if '?' in request: - request, query = request.split('?', 1) + if query != "": args = query.split('&') + d['query'] = query # Count each query argument. This is used later to aggregate # the number of format, prefix, etc. queries. for q in args: @@ -138,8 +136,6 @@ class AccessLogProcessor(object): d['lb_ip'] = lb_ip d['method'] = method d['request'] = request - if query: - d['query'] = query d['http_version'] = http_version d['code'] = code d['referrer'] = referrer diff --git a/test_slogging/unit/test_access_processor.py b/test_slogging/unit/test_access_processor.py index 74163b4..04c001a 100644 --- a/test_slogging/unit/test_access_processor.py +++ b/test_slogging/unit/test_access_processor.py @@ -72,6 +72,38 @@ class TestAccessProcessor(unittest.TestCase): expected['query'] = query self.assertEquals(res, expected) + def test_log_line_parser_query_args_with_slash_delimiter_to_container(self): + p = access_processor.AccessLogProcessor({}) + log_line = [str(x) for x in range(18)] + log_line[1] = 'proxy-server' + log_line[4] = '1/Jan/3/4/5/6' + query = 'prefix=YYYY/MM/DD' + log_line[6] = '/v1/a/c?%s' % query + log_line = 'x' * 16 + ' '.join(log_line) + res = p.log_line_parser(log_line) + + self.assertEquals(res['object_name'], None) + self.assertEquals(res['container_name'], 'c') + self.assertEquals(res['account'], 'a') + self.assertEquals(res['request'], '/v1/a/c') + self.assertEquals(res['query'], query) + + def test_log_line_parser_query_args_with_slash_delimiter_to_account(self): + p = access_processor.AccessLogProcessor({}) + log_line = [str(x) for x in range(18)] + log_line[1] = 'proxy-server' + log_line[4] = '1/Jan/3/4/5/6' + query = 'prefix=YYYY/MM/DD' + log_line[6] = '/v1/a?%s' % query + log_line = 'x' * 16 + ' '.join(log_line) + res = p.log_line_parser(log_line) + + self.assertEquals(res['object_name'], None) + self.assertEquals(res['container_name'], None) + self.assertEquals(res['account'], 'a') + self.assertEquals(res['request'], '/v1/a') + self.assertEquals(res['query'], query) + def test_log_line_parser_field_count(self): p = access_processor.AccessLogProcessor({}) # too few fields