From 32eaab20b1799807995ce5edc0d7c99355d0aa61 Mon Sep 17 00:00:00 2001 From: Vitaly Bordyug Date: Wed, 16 Jul 2025 20:28:28 +0200 Subject: [PATCH] proxy-logging: create field for access_user_id Added the new field to be able to log the access key during the s3api calls, while reserving the field to be filled with auth relevant information in case of other middlewares. Added respective code to the tempauth and keystone middlewares. Since s3api creates a copy of the environ dict for the downstream request object when translating the s3req.to_swift_req the environ dict that is seen/modifed in other mw module is not the same instance seen in proxy-logging - using mutable objects get transfered into the swift_req.environ. Change the assert in test_proxy_logging from "the last field" to the index 21 in the interests of maintainability. Also added some regression tests for object, bucket and s3 v4 apis and updated the documentation with the details about the new field. Signed-off-by: Vitaly Bordyug Change-Id: I0ce4e92458e2b05a4848cc7675604c1aa2b64d64 --- doc/source/logs.rst | 7 +++ etc/proxy-server.conf-sample | 2 +- swift/common/middleware/keystoneauth.py | 4 ++ swift/common/middleware/proxy_logging.py | 18 +++++- swift/common/middleware/s3api/s3request.py | 8 +++ swift/common/middleware/tempauth.py | 4 ++ .../common/middleware/s3api/test_bucket.py | 10 +++- test/unit/common/middleware/s3api/test_obj.py | 23 +++++++- .../common/middleware/s3api/test_s3api.py | 57 ++++++++++++++++++- .../common/middleware/test_proxy_logging.py | 43 +++++++++++++- test/unit/common/middleware/test_tempauth.py | 17 ++++++ 11 files changed, 186 insertions(+), 7 deletions(-) diff --git a/doc/source/logs.rst b/doc/source/logs.rst index ade72e35c1..16bc6f0da7 100644 --- a/doc/source/logs.rst +++ b/doc/source/logs.rst @@ -34,6 +34,7 @@ The default log format is:: {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} {source} {log_info} {start_time} {end_time} {policy_index} + {access_user_id} Some keywords, signaled by the (anonymizable) flag, can be anonymized by using the transformer 'anonymized'. The data are applied the hashing method of @@ -99,6 +100,12 @@ pid PID of the process emitting the log line. wire_status_int The status sent to the client, which may be different than the logged response code if there was an error during the body of the request or a disconnect. +access_user_id The user ID for logging. Middlewares should set + environ['swift.access_logging']['user_id'] to identify the user + for logging purposes. For S3 API requests, this contains the S3 + access key ID. Other auth middlewares should set user-specific + identifiers. For requests without auth middleware support, this + field will be "-". =================== ========================================================== In one log line, all of the above fields are space-separated and url-encoded. diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index 0cfcf417b7..701b514acf 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -1103,7 +1103,7 @@ use = egg:swift#proxy_logging # Template used to format access logs. All words surrounded by curly brackets # will be substituted with the appropriate values. For more information, see # https://docs.openstack.org/swift/latest/logs.html -# log_msg_template = {client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol} {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} {source} {log_info} {start_time} {end_time} {policy_index} +# log_msg_template = {client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol} {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} {source} {log_info} {start_time} {end_time} {policy_index} {access_user_id} # Note: Put before both ratelimit and auth in the pipeline. [filter:bulk] diff --git a/swift/common/middleware/keystoneauth.py b/swift/common/middleware/keystoneauth.py index ebd0552c79..ff25b9f347 100644 --- a/swift/common/middleware/keystoneauth.py +++ b/swift/common/middleware/keystoneauth.py @@ -218,6 +218,10 @@ class KeystoneAuth(object): user_roles = (r.lower() for r in env_identity.get('roles', [])) if self.reseller_admin_role in user_roles: environ['reseller_request'] = True + # Set access_user_id for consistent logging across auth middlewares + access_logging = environ.setdefault('swift.access_logging', {}) + user_id, user_name = env_identity.get('user', (None, None)) + access_logging['user_id'] = user_id or user_name else: self.logger.debug('Authorizing as anonymous') environ['swift.authorize'] = self.authorize_anonymous diff --git a/swift/common/middleware/proxy_logging.py b/swift/common/middleware/proxy_logging.py index beb0bffc50..54ac677bd6 100644 --- a/swift/common/middleware/proxy_logging.py +++ b/swift/common/middleware/proxy_logging.py @@ -120,7 +120,8 @@ class ProxyLoggingMiddleware(object): '{path} {protocol} {status_int} {referer} {user_agent} ' '{auth_token} {bytes_recvd} {bytes_sent} {client_etag} ' '{transaction_id} {headers} {request_time} {source} ' - '{log_info} {start_time} {end_time} {policy_index}')) + '{log_info} {start_time} {end_time} {policy_index} ' + '{access_user_id}')) # The salt is only used in StrAnonymizer. This class requires bytes, # convert it now to prevent useless convertion later. self.anonymization_method = conf.get('log_anonymization_method', 'md5') @@ -210,6 +211,9 @@ class ProxyLoggingMiddleware(object): 'ttfb': '0.05', 'pid': '42', 'wire_status_int': '200', + 'access_user_id': StrAnonymizer('AKIAIOSFODNN7EXAMPLE', + self.anonymization_method, + self.anonymization_salt), } try: self.log_formatter.format(self.log_msg_template, **replacements) @@ -246,6 +250,15 @@ class ProxyLoggingMiddleware(object): if any_obscured: req.params = new_params + def get_access_user_id(self, req): + """ + Get access user ID from request environ. + + :param req: swob.Request object for the request + :returns: User ID for logging if available, None otherwise + """ + return req.environ.get('swift.access_logging', {}).get('user_id') + def log_request(self, req, status_int, bytes_received, bytes_sent, start_time, end_time, resp_headers=None, ttfb=0, wire_status_int=None): @@ -333,6 +346,9 @@ class ProxyLoggingMiddleware(object): 'ttfb': ttfb, 'pid': self.pid, 'wire_status_int': wire_status_int or status_int, + 'access_user_id': StrAnonymizer( + self.get_access_user_id(req), self.anonymization_method, + self.anonymization_salt), } self.access_logger.info( self.log_formatter.format(self.log_msg_template, diff --git a/swift/common/middleware/s3api/s3request.py b/swift/common/middleware/s3api/s3request.py index 3bf04e5f8f..7fb4a439bb 100644 --- a/swift/common/middleware/s3api/s3request.py +++ b/swift/common/middleware/s3api/s3request.py @@ -1099,6 +1099,14 @@ class S3Request(swob.Request): 'string_to_sign': self.sig_checker.string_to_sign, 'check_signature': self.sig_checker.check_signature, } + # Set the logging field (if not set already) + # Because auth mw to our right will only see a copy of the SwiftRequest + # environ we use a mutable value to back-propagate updates to proxy-log + access_key_value = (self.access_key[:125] + '...' + if len(self.access_key) > 128 + else self.access_key) + self.environ.setdefault('swift.access_logging', {}).setdefault( + 'user_id', access_key_value) self.account = None self.user_id = None self.policy_index = None diff --git a/swift/common/middleware/tempauth.py b/swift/common/middleware/tempauth.py index 6a0f12c685..4daaac4f4b 100644 --- a/swift/common/middleware/tempauth.py +++ b/swift/common/middleware/tempauth.py @@ -318,6 +318,8 @@ class TempAuth(object): env['REMOTE_USER'] = groups env['swift.authorize'] = self.authorize env['swift.clean_acl'] = clean_acl + # Set access_user_id for consistent logging across middlewares + env.setdefault('swift.access_logging', {})['user_id'] = user if '.reseller_admin' in groups: env['reseller_request'] = True else: @@ -879,6 +881,8 @@ class TempAuth(object): user = wsgi_to_str(user) key = wsgi_to_str(key) account_user = account + ':' + user + req.environ.setdefault( + 'swift.access_logging', {})['user_id'] = account_user if account_user not in self.users: self.logger.increment('token_denied') return HTTPUnauthorized(request=req, headers=unauthed_headers) diff --git a/test/unit/common/middleware/s3api/test_bucket.py b/test/unit/common/middleware/s3api/test_bucket.py index 3625cf3f99..ec2f325e76 100644 --- a/test/unit/common/middleware/s3api/test_bucket.py +++ b/test/unit/common/middleware/s3api/test_bucket.py @@ -471,7 +471,8 @@ class TestS3ApiBucketNoACL(BaseS3ApiBucket, S3ApiTestCase): self.assertEqual(1, len(access_lines)) parts = access_lines[0].split() self.assertEqual(' '.join(parts[3:7]), 'HEAD /junk HTTP/1.0 200') - self.assertEqual(parts[-1], str(bucket_policy_index)) + self.assertEqual(parts[-2], str(bucket_policy_index)) + self.assertEqual(parts[-1], 'test:tester') # access_user_id def test_bucket_HEAD_policy_index_logging(self): self._do_test_bucket_HEAD_policy_index_logging(0) @@ -1494,9 +1495,16 @@ class TestS3ApiBucketNoACL(BaseS3ApiBucket, S3ApiTestCase): environ={'REQUEST_METHOD': 'PUT'}, headers=headers, body=req_body) + + # Test V4 bucket creation and access_user_id setting status, headers, body = self.call_s3api(req) self.assertEqual(status.split()[0], '200', body) + # Verify access_user_id is set correctly in environ for V4 bucket + # creation + self.assertEqual(req.environ['swift.access_logging']['user_id'], + 'test:tester') + def test_bucket_PUT_v4_with_body_bad_hash(self): elem = Element('CreateBucketConfiguration') SubElement(elem, 'LocationConstraint').text = self.s3api.conf.location diff --git a/test/unit/common/middleware/s3api/test_obj.py b/test/unit/common/middleware/s3api/test_obj.py index f9c77de907..dcce3a0905 100644 --- a/test/unit/common/middleware/s3api/test_obj.py +++ b/test/unit/common/middleware/s3api/test_obj.py @@ -84,7 +84,10 @@ class BaseS3ApiObj(object): environ={'REQUEST_METHOD': method}, headers={'Authorization': 'AWS test:tester:hmac', 'Date': self.get_date_header()}) + self.assertNotIn('swift.access_logging', req.environ) status, headers, body = self.call_s3api(req) + self.assertEqual(req.environ['swift.access_logging'], + {'user_id': 'test:tester'}) self.assertEqual(status.split()[0], '200') # we'll want this for logging self._assert_policy_index(req.headers, headers, @@ -204,7 +207,8 @@ class BaseS3ApiObj(object): parts = access_lines[0].split() self.assertEqual(' '.join(parts[3:7]), 'GET /bucket/object HTTP/1.0 200') - self.assertEqual(parts[-1], str(bucket_policy_index)) + self.assertEqual(parts[-2], str(bucket_policy_index)) + self.assertEqual(parts[-1], 'test:tester') # access_user_id def _test_object_HEAD_Range(self, range_value): req = Request.blank('/bucket/object', @@ -784,6 +788,8 @@ class BaseS3ApiObj(object): body=self.object_body) req.date = datetime.now() req.content_type = 'text/plain' + + # Test V4 signature processing and access_user_id setting status, headers, body = self.call_s3api(req) self.assertEqual(status.split()[0], '200') # Check that s3api returns an etag header. @@ -796,6 +802,10 @@ class BaseS3ApiObj(object): self.assertEqual('/v1/AUTH_test/bucket/object', req.environ.get('swift.backend_path')) + # Verify access_user_id is set correctly in environ for V4 signature + self.assertEqual(req.environ['swift.access_logging']['user_id'], + 'test:tester') + def test_object_PUT_v4_bad_hash(self): orig_app = self.s3api.app @@ -826,7 +836,10 @@ class BaseS3ApiObj(object): body=self.object_body) req.date = datetime.now() req.content_type = 'text/plain' + self.assertNotIn('swift.access_logging', req.environ) status, headers, body = self.call_s3api(req) + self.assertEqual(req.environ['swift.access_logging'], + {'user_id': 'test:tester'}) self.assertEqual(status.split()[0], '400') self.assertEqual(self._get_error_code(body), 'XAmzContentSHA256Mismatch') @@ -877,6 +890,9 @@ class BaseS3ApiObj(object): body=self.object_body) req.date = datetime.now() req.content_type = 'text/plain' + + # Test V4 UNSIGNED-PAYLOAD signature processing and access_user_id + # setting status, headers, body = self.call_s3api(req) self.assertEqual(status.split()[0], '200') # Check that s3api returns an etag header. @@ -889,6 +905,11 @@ class BaseS3ApiObj(object): self.assertIn(b'UNSIGNED-PAYLOAD', SigV4Request( req.environ, self.s3api.conf)._canonical_request()) + # Verify access_user_id is set correctly in environ for V4 + # UNSIGNED-PAYLOAD + self.assertEqual(req.environ['swift.access_logging']['user_id'], + 'test:tester') + def _test_object_PUT_copy(self, head_resp, put_header=None, src_path='/some/source', timestamp=None): account = 'test:tester' diff --git a/test/unit/common/middleware/s3api/test_s3api.py b/test/unit/common/middleware/s3api/test_s3api.py index f996326562..88c4895836 100644 --- a/test/unit/common/middleware/s3api/test_s3api.py +++ b/test/unit/common/middleware/s3api/test_s3api.py @@ -536,7 +536,11 @@ class TestS3ApiMiddleware(S3ApiTestCase): headers={'Date': self.get_date_header()}, environ={'REQUEST_METHOD': 'GET'}) req.content_type = 'text/plain' - status, headers, body = self.call_s3api(req) + + # Test with ProxyLoggingMiddleware for access_user_id logging for + # presigned URLs + app = ProxyLoggingMiddleware(self.s3api, {}, logger=self.logger) + status, headers, body = self.call_app(req, app=app) self.assertIn('swift.backend_path', req.environ) self.assertEqual('/v1/AUTH_test/bucket/object', req.environ['swift.backend_path']) @@ -545,6 +549,16 @@ class TestS3ApiMiddleware(S3ApiTestCase): self.assertNotIn('Authorization', headers) self.assertNotIn('X-Auth-Token', headers) + # Verify access_user_id is logged correctly for V4 presigned URLs + access_lines = self.logger.get_lines_for_level('info') + self.assertEqual(1, len(access_lines)) + parts = access_lines[0].split() + # For presigned URLs, the logged path includes query parameters + self.assertTrue(' '.join(parts[3:6]).startswith('GET /bucket/object')) + self.assertEqual(parts[5], 'HTTP/1.0') + self.assertEqual(parts[6], '200') + self.assertEqual(parts[-1], 'test:tester') # access_user_id + def test_signed_urls_v4_bad_credential(self): def test(credential, message, extra=b''): req = Request.blank( @@ -1134,7 +1148,10 @@ class TestS3ApiMiddleware(S3ApiTestCase): 'X-Amz-Content-SHA256': '0' * 64} req = Request.blank('/bucket/object', environ=environ, headers=headers) req.content_type = 'text/plain' - status, headers, body = self.call_s3api(req) + + # Test with ProxyLoggingMiddleware to verify access_user_id logging + app = ProxyLoggingMiddleware(self.s3api, {}, logger=self.logger) + status, headers, body = self.call_app(req, app=app) self.assertEqual(status.split()[0], '200', body) self.assertIn('swift.backend_path', req.environ) self.assertEqual('/v1/AUTH_test/bucket/object', @@ -1143,6 +1160,14 @@ class TestS3ApiMiddleware(S3ApiTestCase): self.assertEqual(authz_header, headers['Authorization']) self.assertNotIn('X-Auth-Token', headers) + # Verify access_user_id is logged correctly for V4 signature + access_lines = self.logger.get_lines_for_level('info') + self.assertEqual(1, len(access_lines)) + parts = access_lines[0].split() + self.assertEqual(' '.join(parts[3:7]), + 'GET /bucket/object HTTP/1.0 200') + self.assertEqual(parts[-1], 'test:tester') # access_user_id + def test_signature_v4_no_date(self): environ = { 'REQUEST_METHOD': 'GET'} @@ -1752,7 +1777,9 @@ class TestS3ApiMiddleware(S3ApiTestCase): req.content_type = 'text/plain' log_conf = {'log_msg_template': '{method} {path} {log_info}'} app = ProxyLoggingMiddleware(self.s3api, log_conf, self.logger) + self.assertNotIn('swift.access_logging', req.environ) status, headers, body = self.call_app(req, app=app) + self.assertNotIn('swift.access_logging', req.environ) self.assertEqual( ['403.AccessDenied.invalid_credential'], @@ -1763,6 +1790,32 @@ class TestS3ApiMiddleware(S3ApiTestCase): ['HEAD /bucket/object s3:err:AccessDenied.invalid_credential'], self.logger.get_lines_for_level('info')) + def test_access_user_id_logging(self): + # verify that proxy logging gets access_user_id from S3 requests + environ = {'REQUEST_METHOD': 'GET'} + headers = { + 'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()} + req = Request.blank('/bucket/object', environ=environ, + headers=headers) + + # Use a log template that includes access_user_id + log_conf = { + 'log_msg_template': + '{method} {path} {account} {container} {object} ' + '{status_int} {access_user_id}' + } + app = ProxyLoggingMiddleware(self.s3api, log_conf, self.logger) + status, headers, body = self.call_app(req, app=app) + + # Should be successful GET + self.assertEqual(status.split()[0], '200') + # Verify the access_user_id is logged correctly + expected_log = ('GET /bucket/object AUTH_test bucket object ' + '200 test:tester') + self.assertEqual([expected_log], + self.logger.get_lines_for_level('info')) + if __name__ == '__main__': unittest.main() diff --git a/test/unit/common/middleware/test_proxy_logging.py b/test/unit/common/middleware/test_proxy_logging.py index 05146e7d08..d6fa90ce56 100644 --- a/test/unit/common/middleware/test_proxy_logging.py +++ b/test/unit/common/middleware/test_proxy_logging.py @@ -1906,7 +1906,7 @@ class TestProxyLogging(unittest.TestCase): resp = app(req.environ, start_response) resp_body = b''.join(resp) log_parts = self._log_parts(app) - self.assertEqual(len(log_parts), 21) + self.assertEqual(len(log_parts), 22) self.assertEqual(log_parts[0], '-') self.assertEqual(log_parts[1], '-') self.assertEqual(log_parts[2], '26/Apr/1970/17/46/41') @@ -1929,6 +1929,7 @@ class TestProxyLogging(unittest.TestCase): self.assertEqual(log_parts[18], '10000000.000000000') self.assertEqual(log_parts[19], '10000001.000000000') self.assertEqual(log_parts[20], '-') + self.assertEqual(log_parts[21], '-') def test_dual_logging_middlewares(self): # Since no internal request is being made, outer most proxy logging @@ -2148,3 +2149,43 @@ class TestProxyLogging(unittest.TestCase): obscured_test(params, headers, ['param_two', 'param_one'], ['X-Auth-Token', 'X-Other-Header'], expected_params, expected_headers) + + def test_access_user_id_field(self): + """Test that access_user_id field is logged correctly.""" + app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {}) + app.access_logger = debug_logger() + req = Request.blank('/', environ={ + 'REQUEST_METHOD': 'GET', + 'swift.access_logging': {'user_id': 'test:tester'}, + }) + resp = app(req.environ, start_response) + b''.join(resp) + log_parts = self._log_parts(app) + self.assertEqual(log_parts[21], 'test:tester') + + # test that user_id is not logged if it is not present + app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {}) + app.access_logger = debug_logger() + req = Request.blank('/', environ={ + 'REQUEST_METHOD': 'GET', + }) + resp = app(req.environ, start_response) + b''.join(resp) + log_parts = self._log_parts(app) + self.assertEqual(log_parts[21], '-') + + def test_access_user_id_field_with_anonymization(self): + app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), { + 'log_anonymization_salt': 'secret_salt', + 'log_msg_template': '{method} {path} {access_user_id.anonymized}' + }) + app.access_logger = debug_logger() + req = Request.blank('/', environ={ + 'REQUEST_METHOD': 'GET', + 'swift.access_logging': {'user_id': 'test:tester'}, + }) + resp = app(req.environ, start_response) + b''.join(resp) + log_parts = self._log_parts(app) + self.assertEqual(log_parts[-1], + '{SMD5}14fe1612c332096e282486e4baa37e63') diff --git a/test/unit/common/middleware/test_tempauth.py b/test/unit/common/middleware/test_tempauth.py index 7fcb4acab3..7d39ee8773 100644 --- a/test/unit/common/middleware/test_tempauth.py +++ b/test/unit/common/middleware/test_tempauth.py @@ -645,6 +645,20 @@ class TestAuth(unittest.TestCase): auth.DEFAULT_TOKEN_LIFE - 0.5, delta=0.5) self.assertGreater(len(resp.headers['x-auth-token']), 10) + def test_get_token_sets_access_logging_user_id(self): + # Test that token generation sets access logging user_id + test_auth = auth.filter_factory({'user_ac_user': 'testing'})(FakeApp()) + req = self._make_request( + '/auth/v1.0', + headers={'X-Auth-User': 'ac:user', 'X-Auth-Key': 'testing'}) + resp = req.get_response(test_auth) + self.assertEqual(resp.status_int, 200) + # Check that access logging user_id was set during token generation + self.assertIn('swift.access_logging', req.environ) + self.assertIn('user_id', req.environ['swift.access_logging']) + self.assertEqual(req.environ['swift.access_logging']['user_id'], + 'ac:user') + def test_get_token_memcache_error(self): test_auth = auth.filter_factory({'user_ac_user': 'testing'})(FakeApp()) req = self._make_request( @@ -1135,7 +1149,10 @@ class TestAuth(unittest.TestCase): req = self._make_request( quoted_acct, headers={'X-Auth-Token': auth_token}) req.environ['swift.cache'] = memcache + self.assertNotIn('swift.access_logging', req.environ) resp = req.get_response(ath) + self.assertEqual(req.environ['swift.access_logging'], + {'user_id': 't\u00e9st:t\u00e9ster'}) self.assertEqual(204, resp.status_int) # ...but it also works if you send the account raw