Merge "proxy-logging: create field for access_user_id"

This commit is contained in:
Zuul
2025-08-26 03:43:46 +00:00
committed by Gerrit Code Review
11 changed files with 186 additions and 7 deletions

View File

@@ -34,6 +34,7 @@ The default log format is::
{status_int} {referer} {user_agent} {auth_token} {bytes_recvd}
{bytes_sent} {client_etag} {transaction_id} {headers} {request_time}
{source} {log_info} {start_time} {end_time} {policy_index}
{access_user_id}
Some keywords, signaled by the (anonymizable) flag, can be anonymized by
using the transformer 'anonymized'. The data are applied the hashing method of
@@ -99,6 +100,12 @@ pid PID of the process emitting the log line.
wire_status_int The status sent to the client, which may be different than
the logged response code if there was an error during the
body of the request or a disconnect.
access_user_id The user ID for logging. Middlewares should set
environ['swift.access_logging']['user_id'] to identify the user
for logging purposes. For S3 API requests, this contains the S3
access key ID. Other auth middlewares should set user-specific
identifiers. For requests without auth middleware support, this
field will be "-".
=================== ==========================================================
In one log line, all of the above fields are space-separated and url-encoded.

View File

@@ -1103,7 +1103,7 @@ use = egg:swift#proxy_logging
# Template used to format access logs. All words surrounded by curly brackets
# will be substituted with the appropriate values. For more information, see
# https://docs.openstack.org/swift/latest/logs.html
# log_msg_template = {client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol} {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} {source} {log_info} {start_time} {end_time} {policy_index}
# log_msg_template = {client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol} {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} {source} {log_info} {start_time} {end_time} {policy_index} {access_user_id}
# Note: Put before both ratelimit and auth in the pipeline.
[filter:bulk]

View File

@@ -218,6 +218,10 @@ class KeystoneAuth(object):
user_roles = (r.lower() for r in env_identity.get('roles', []))
if self.reseller_admin_role in user_roles:
environ['reseller_request'] = True
# Set access_user_id for consistent logging across auth middlewares
access_logging = environ.setdefault('swift.access_logging', {})
user_id, user_name = env_identity.get('user', (None, None))
access_logging['user_id'] = user_id or user_name
else:
self.logger.debug('Authorizing as anonymous')
environ['swift.authorize'] = self.authorize_anonymous

View File

@@ -120,7 +120,8 @@ class ProxyLoggingMiddleware(object):
'{path} {protocol} {status_int} {referer} {user_agent} '
'{auth_token} {bytes_recvd} {bytes_sent} {client_etag} '
'{transaction_id} {headers} {request_time} {source} '
'{log_info} {start_time} {end_time} {policy_index}'))
'{log_info} {start_time} {end_time} {policy_index} '
'{access_user_id}'))
# The salt is only used in StrAnonymizer. This class requires bytes,
# convert it now to prevent useless convertion later.
self.anonymization_method = conf.get('log_anonymization_method', 'md5')
@@ -210,6 +211,9 @@ class ProxyLoggingMiddleware(object):
'ttfb': '0.05',
'pid': '42',
'wire_status_int': '200',
'access_user_id': StrAnonymizer('AKIAIOSFODNN7EXAMPLE',
self.anonymization_method,
self.anonymization_salt),
}
try:
self.log_formatter.format(self.log_msg_template, **replacements)
@@ -246,6 +250,15 @@ class ProxyLoggingMiddleware(object):
if any_obscured:
req.params = new_params
def get_access_user_id(self, req):
"""
Get access user ID from request environ.
:param req: swob.Request object for the request
:returns: User ID for logging if available, None otherwise
"""
return req.environ.get('swift.access_logging', {}).get('user_id')
def log_request(self, req, status_int, bytes_received, bytes_sent,
start_time, end_time, resp_headers=None, ttfb=0,
wire_status_int=None):
@@ -333,6 +346,9 @@ class ProxyLoggingMiddleware(object):
'ttfb': ttfb,
'pid': self.pid,
'wire_status_int': wire_status_int or status_int,
'access_user_id': StrAnonymizer(
self.get_access_user_id(req), self.anonymization_method,
self.anonymization_salt),
}
self.access_logger.info(
self.log_formatter.format(self.log_msg_template,

View File

@@ -1099,6 +1099,14 @@ class S3Request(swob.Request):
'string_to_sign': self.sig_checker.string_to_sign,
'check_signature': self.sig_checker.check_signature,
}
# Set the logging field (if not set already)
# Because auth mw to our right will only see a copy of the SwiftRequest
# environ we use a mutable value to back-propagate updates to proxy-log
access_key_value = (self.access_key[:125] + '...'
if len(self.access_key) > 128
else self.access_key)
self.environ.setdefault('swift.access_logging', {}).setdefault(
'user_id', access_key_value)
self.account = None
self.user_id = None
self.policy_index = None

View File

@@ -318,6 +318,8 @@ class TempAuth(object):
env['REMOTE_USER'] = groups
env['swift.authorize'] = self.authorize
env['swift.clean_acl'] = clean_acl
# Set access_user_id for consistent logging across middlewares
env.setdefault('swift.access_logging', {})['user_id'] = user
if '.reseller_admin' in groups:
env['reseller_request'] = True
else:
@@ -879,6 +881,8 @@ class TempAuth(object):
user = wsgi_to_str(user)
key = wsgi_to_str(key)
account_user = account + ':' + user
req.environ.setdefault(
'swift.access_logging', {})['user_id'] = account_user
if account_user not in self.users:
self.logger.increment('token_denied')
return HTTPUnauthorized(request=req, headers=unauthed_headers)

View File

@@ -471,7 +471,8 @@ class TestS3ApiBucketNoACL(BaseS3ApiBucket, S3ApiTestCase):
self.assertEqual(1, len(access_lines))
parts = access_lines[0].split()
self.assertEqual(' '.join(parts[3:7]), 'HEAD /junk HTTP/1.0 200')
self.assertEqual(parts[-1], str(bucket_policy_index))
self.assertEqual(parts[-2], str(bucket_policy_index))
self.assertEqual(parts[-1], 'test:tester') # access_user_id
def test_bucket_HEAD_policy_index_logging(self):
self._do_test_bucket_HEAD_policy_index_logging(0)
@@ -1494,9 +1495,16 @@ class TestS3ApiBucketNoACL(BaseS3ApiBucket, S3ApiTestCase):
environ={'REQUEST_METHOD': 'PUT'},
headers=headers,
body=req_body)
# Test V4 bucket creation and access_user_id setting
status, headers, body = self.call_s3api(req)
self.assertEqual(status.split()[0], '200', body)
# Verify access_user_id is set correctly in environ for V4 bucket
# creation
self.assertEqual(req.environ['swift.access_logging']['user_id'],
'test:tester')
def test_bucket_PUT_v4_with_body_bad_hash(self):
elem = Element('CreateBucketConfiguration')
SubElement(elem, 'LocationConstraint').text = self.s3api.conf.location

View File

@@ -84,7 +84,10 @@ class BaseS3ApiObj(object):
environ={'REQUEST_METHOD': method},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
self.assertNotIn('swift.access_logging', req.environ)
status, headers, body = self.call_s3api(req)
self.assertEqual(req.environ['swift.access_logging'],
{'user_id': 'test:tester'})
self.assertEqual(status.split()[0], '200')
# we'll want this for logging
self._assert_policy_index(req.headers, headers,
@@ -204,7 +207,8 @@ class BaseS3ApiObj(object):
parts = access_lines[0].split()
self.assertEqual(' '.join(parts[3:7]),
'GET /bucket/object HTTP/1.0 200')
self.assertEqual(parts[-1], str(bucket_policy_index))
self.assertEqual(parts[-2], str(bucket_policy_index))
self.assertEqual(parts[-1], 'test:tester') # access_user_id
def _test_object_HEAD_Range(self, range_value):
req = Request.blank('/bucket/object',
@@ -784,6 +788,8 @@ class BaseS3ApiObj(object):
body=self.object_body)
req.date = datetime.now()
req.content_type = 'text/plain'
# Test V4 signature processing and access_user_id setting
status, headers, body = self.call_s3api(req)
self.assertEqual(status.split()[0], '200')
# Check that s3api returns an etag header.
@@ -796,6 +802,10 @@ class BaseS3ApiObj(object):
self.assertEqual('/v1/AUTH_test/bucket/object',
req.environ.get('swift.backend_path'))
# Verify access_user_id is set correctly in environ for V4 signature
self.assertEqual(req.environ['swift.access_logging']['user_id'],
'test:tester')
def test_object_PUT_v4_bad_hash(self):
orig_app = self.s3api.app
@@ -826,7 +836,10 @@ class BaseS3ApiObj(object):
body=self.object_body)
req.date = datetime.now()
req.content_type = 'text/plain'
self.assertNotIn('swift.access_logging', req.environ)
status, headers, body = self.call_s3api(req)
self.assertEqual(req.environ['swift.access_logging'],
{'user_id': 'test:tester'})
self.assertEqual(status.split()[0], '400')
self.assertEqual(self._get_error_code(body),
'XAmzContentSHA256Mismatch')
@@ -877,6 +890,9 @@ class BaseS3ApiObj(object):
body=self.object_body)
req.date = datetime.now()
req.content_type = 'text/plain'
# Test V4 UNSIGNED-PAYLOAD signature processing and access_user_id
# setting
status, headers, body = self.call_s3api(req)
self.assertEqual(status.split()[0], '200')
# Check that s3api returns an etag header.
@@ -889,6 +905,11 @@ class BaseS3ApiObj(object):
self.assertIn(b'UNSIGNED-PAYLOAD', SigV4Request(
req.environ, self.s3api.conf)._canonical_request())
# Verify access_user_id is set correctly in environ for V4
# UNSIGNED-PAYLOAD
self.assertEqual(req.environ['swift.access_logging']['user_id'],
'test:tester')
def _test_object_PUT_copy(self, head_resp, put_header=None,
src_path='/some/source', timestamp=None):
account = 'test:tester'

View File

@@ -536,7 +536,11 @@ class TestS3ApiMiddleware(S3ApiTestCase):
headers={'Date': self.get_date_header()},
environ={'REQUEST_METHOD': 'GET'})
req.content_type = 'text/plain'
status, headers, body = self.call_s3api(req)
# Test with ProxyLoggingMiddleware for access_user_id logging for
# presigned URLs
app = ProxyLoggingMiddleware(self.s3api, {}, logger=self.logger)
status, headers, body = self.call_app(req, app=app)
self.assertIn('swift.backend_path', req.environ)
self.assertEqual('/v1/AUTH_test/bucket/object',
req.environ['swift.backend_path'])
@@ -545,6 +549,16 @@ class TestS3ApiMiddleware(S3ApiTestCase):
self.assertNotIn('Authorization', headers)
self.assertNotIn('X-Auth-Token', headers)
# Verify access_user_id is logged correctly for V4 presigned URLs
access_lines = self.logger.get_lines_for_level('info')
self.assertEqual(1, len(access_lines))
parts = access_lines[0].split()
# For presigned URLs, the logged path includes query parameters
self.assertTrue(' '.join(parts[3:6]).startswith('GET /bucket/object'))
self.assertEqual(parts[5], 'HTTP/1.0')
self.assertEqual(parts[6], '200')
self.assertEqual(parts[-1], 'test:tester') # access_user_id
def test_signed_urls_v4_bad_credential(self):
def test(credential, message, extra=b''):
req = Request.blank(
@@ -1134,7 +1148,10 @@ class TestS3ApiMiddleware(S3ApiTestCase):
'X-Amz-Content-SHA256': '0' * 64}
req = Request.blank('/bucket/object', environ=environ, headers=headers)
req.content_type = 'text/plain'
status, headers, body = self.call_s3api(req)
# Test with ProxyLoggingMiddleware to verify access_user_id logging
app = ProxyLoggingMiddleware(self.s3api, {}, logger=self.logger)
status, headers, body = self.call_app(req, app=app)
self.assertEqual(status.split()[0], '200', body)
self.assertIn('swift.backend_path', req.environ)
self.assertEqual('/v1/AUTH_test/bucket/object',
@@ -1143,6 +1160,14 @@ class TestS3ApiMiddleware(S3ApiTestCase):
self.assertEqual(authz_header, headers['Authorization'])
self.assertNotIn('X-Auth-Token', headers)
# Verify access_user_id is logged correctly for V4 signature
access_lines = self.logger.get_lines_for_level('info')
self.assertEqual(1, len(access_lines))
parts = access_lines[0].split()
self.assertEqual(' '.join(parts[3:7]),
'GET /bucket/object HTTP/1.0 200')
self.assertEqual(parts[-1], 'test:tester') # access_user_id
def test_signature_v4_no_date(self):
environ = {
'REQUEST_METHOD': 'GET'}
@@ -1752,7 +1777,9 @@ class TestS3ApiMiddleware(S3ApiTestCase):
req.content_type = 'text/plain'
log_conf = {'log_msg_template': '{method} {path} {log_info}'}
app = ProxyLoggingMiddleware(self.s3api, log_conf, self.logger)
self.assertNotIn('swift.access_logging', req.environ)
status, headers, body = self.call_app(req, app=app)
self.assertNotIn('swift.access_logging', req.environ)
self.assertEqual(
['403.AccessDenied.invalid_credential'],
@@ -1763,6 +1790,32 @@ class TestS3ApiMiddleware(S3ApiTestCase):
['HEAD /bucket/object s3:err:AccessDenied.invalid_credential'],
self.logger.get_lines_for_level('info'))
def test_access_user_id_logging(self):
# verify that proxy logging gets access_user_id from S3 requests
environ = {'REQUEST_METHOD': 'GET'}
headers = {
'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()}
req = Request.blank('/bucket/object', environ=environ,
headers=headers)
# Use a log template that includes access_user_id
log_conf = {
'log_msg_template':
'{method} {path} {account} {container} {object} '
'{status_int} {access_user_id}'
}
app = ProxyLoggingMiddleware(self.s3api, log_conf, self.logger)
status, headers, body = self.call_app(req, app=app)
# Should be successful GET
self.assertEqual(status.split()[0], '200')
# Verify the access_user_id is logged correctly
expected_log = ('GET /bucket/object AUTH_test bucket object '
'200 test:tester')
self.assertEqual([expected_log],
self.logger.get_lines_for_level('info'))
if __name__ == '__main__':
unittest.main()

View File

@@ -1906,7 +1906,7 @@ class TestProxyLogging(unittest.TestCase):
resp = app(req.environ, start_response)
resp_body = b''.join(resp)
log_parts = self._log_parts(app)
self.assertEqual(len(log_parts), 21)
self.assertEqual(len(log_parts), 22)
self.assertEqual(log_parts[0], '-')
self.assertEqual(log_parts[1], '-')
self.assertEqual(log_parts[2], '26/Apr/1970/17/46/41')
@@ -1929,6 +1929,7 @@ class TestProxyLogging(unittest.TestCase):
self.assertEqual(log_parts[18], '10000000.000000000')
self.assertEqual(log_parts[19], '10000001.000000000')
self.assertEqual(log_parts[20], '-')
self.assertEqual(log_parts[21], '-')
def test_dual_logging_middlewares(self):
# Since no internal request is being made, outer most proxy logging
@@ -2148,3 +2149,43 @@ class TestProxyLogging(unittest.TestCase):
obscured_test(params, headers, ['param_two', 'param_one'],
['X-Auth-Token', 'X-Other-Header'],
expected_params, expected_headers)
def test_access_user_id_field(self):
"""Test that access_user_id field is logged correctly."""
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {})
app.access_logger = debug_logger()
req = Request.blank('/', environ={
'REQUEST_METHOD': 'GET',
'swift.access_logging': {'user_id': 'test:tester'},
})
resp = app(req.environ, start_response)
b''.join(resp)
log_parts = self._log_parts(app)
self.assertEqual(log_parts[21], 'test:tester')
# test that user_id is not logged if it is not present
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {})
app.access_logger = debug_logger()
req = Request.blank('/', environ={
'REQUEST_METHOD': 'GET',
})
resp = app(req.environ, start_response)
b''.join(resp)
log_parts = self._log_parts(app)
self.assertEqual(log_parts[21], '-')
def test_access_user_id_field_with_anonymization(self):
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {
'log_anonymization_salt': 'secret_salt',
'log_msg_template': '{method} {path} {access_user_id.anonymized}'
})
app.access_logger = debug_logger()
req = Request.blank('/', environ={
'REQUEST_METHOD': 'GET',
'swift.access_logging': {'user_id': 'test:tester'},
})
resp = app(req.environ, start_response)
b''.join(resp)
log_parts = self._log_parts(app)
self.assertEqual(log_parts[-1],
'{SMD5}14fe1612c332096e282486e4baa37e63')

View File

@@ -645,6 +645,20 @@ class TestAuth(unittest.TestCase):
auth.DEFAULT_TOKEN_LIFE - 0.5, delta=0.5)
self.assertGreater(len(resp.headers['x-auth-token']), 10)
def test_get_token_sets_access_logging_user_id(self):
# Test that token generation sets access logging user_id
test_auth = auth.filter_factory({'user_ac_user': 'testing'})(FakeApp())
req = self._make_request(
'/auth/v1.0',
headers={'X-Auth-User': 'ac:user', 'X-Auth-Key': 'testing'})
resp = req.get_response(test_auth)
self.assertEqual(resp.status_int, 200)
# Check that access logging user_id was set during token generation
self.assertIn('swift.access_logging', req.environ)
self.assertIn('user_id', req.environ['swift.access_logging'])
self.assertEqual(req.environ['swift.access_logging']['user_id'],
'ac:user')
def test_get_token_memcache_error(self):
test_auth = auth.filter_factory({'user_ac_user': 'testing'})(FakeApp())
req = self._make_request(
@@ -1135,7 +1149,10 @@ class TestAuth(unittest.TestCase):
req = self._make_request(
quoted_acct, headers={'X-Auth-Token': auth_token})
req.environ['swift.cache'] = memcache
self.assertNotIn('swift.access_logging', req.environ)
resp = req.get_response(ath)
self.assertEqual(req.environ['swift.access_logging'],
{'user_id': 't\u00e9st:t\u00e9ster'})
self.assertEqual(204, resp.status_int)
# ...but it also works if you send the account raw