Merge "Fix OIDC federation UTF-8 double-encoding of non-ASCII characters"
This commit is contained in:
@@ -444,11 +444,25 @@ def get_assertion_params_from_env():
|
||||
for k, v in list(flask.request.environ.items()):
|
||||
if not k.startswith(prefix):
|
||||
continue
|
||||
# These bytes may be decodable as ISO-8859-1 according to Section
|
||||
# 3.2.4 of RFC 7230. Let's assume that our web server plugins are
|
||||
# correctly encoding the data.
|
||||
if not isinstance(v, str) and getattr(v, 'decode', False):
|
||||
v = v.decode('ISO-8859-1')
|
||||
if isinstance(v, str):
|
||||
# Per Section 3.2.4 of RFC 7230, HTTP header field values use
|
||||
# ISO-8859-1 encoding, and PEP 3333 requires WSGI environ
|
||||
# values to be native strings decoded as Latin-1 accordingly.
|
||||
# However, OIDC IdPs commonly send assertion values encoded as
|
||||
# UTF-8 (e.g. non-ASCII characters like 'ñ' or 'å'). When
|
||||
# mod_wsgi decodes those UTF-8 bytes as Latin-1, the result is
|
||||
# mojibake. We reverse the Latin-1 decode and re-decode as
|
||||
# UTF-8 to recover the original text. If that fails, the value
|
||||
# was legitimately Latin-1 and is kept as-is.
|
||||
try:
|
||||
v = v.encode('ISO-8859-1').decode('utf-8')
|
||||
except (UnicodeDecodeError, UnicodeEncodeError):
|
||||
pass
|
||||
elif getattr(v, 'decode', False):
|
||||
try:
|
||||
v = v.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
v = v.decode('ISO-8859-1')
|
||||
yield (k, v)
|
||||
|
||||
|
||||
|
||||
@@ -997,6 +997,77 @@ class TestUnicodeAssertionData(unit.BaseTestCase):
|
||||
self.assertEqual(full_name, user_name)
|
||||
|
||||
|
||||
class TestWsgiUtf8LatinRoundtrip(unit.BaseTestCase):
|
||||
"""Test that UTF-8 data mangled by WSGI Latin-1 decoding is recovered.
|
||||
|
||||
PEP 3333 requires WSGI environ values to be native strings decoded as
|
||||
Latin-1. When an IdP sends UTF-8 encoded non-ASCII characters (e.g.
|
||||
Spanish 'ñ' or Scandinavian 'å'), mod_wsgi decodes the raw UTF-8 bytes
|
||||
as Latin-1, producing mojibake. The get_assertion_params_from_env()
|
||||
function must reverse this by encoding back to Latin-1 and re-decoding
|
||||
as UTF-8.
|
||||
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.config_fixture = self.useFixture(config_fixture.Config(CONF))
|
||||
self.config_fixture.config(group='federation', assertion_prefix='PFX')
|
||||
|
||||
def _get_assertion_via_wsgi(self, environ_overrides):
|
||||
app = flask.Flask(__name__)
|
||||
with app.test_request_context(
|
||||
path='/path', environ_overrides=environ_overrides
|
||||
):
|
||||
return dict(mapping_utils.get_assertion_params_from_env())
|
||||
|
||||
def test_utf8_latin1_roundtrip_recovers_unicode(self):
|
||||
"""Verify that double-encoded UTF-8 assertion values are recovered."""
|
||||
assertion = self._get_assertion_via_wsgi(
|
||||
mapping_fixtures.WSGI_LATIN1_UTF8_ASSERTION
|
||||
)
|
||||
self.assertEqual('Jon Kåre', assertion['PFX_FirstName'])
|
||||
self.assertEqual('Hellån', assertion['PFX_LastName'])
|
||||
|
||||
def test_already_correct_unicode_is_preserved(self):
|
||||
"""Verify that properly decoded Unicode values are not corrupted."""
|
||||
assertion = self._get_assertion_via_wsgi(
|
||||
mapping_fixtures.UNICODE_NAME_ASSERTION
|
||||
)
|
||||
self.assertEqual('Jon Kåre', assertion['PFX_FirstName'])
|
||||
self.assertEqual('Hellån', assertion['PFX_LastName'])
|
||||
|
||||
def test_ascii_values_are_unaffected(self):
|
||||
"""Verify that pure ASCII values pass through unchanged."""
|
||||
assertion = self._get_assertion_via_wsgi(
|
||||
mapping_fixtures.UNICODE_NAME_ASSERTION
|
||||
)
|
||||
self.assertEqual('jon@example.com', assertion['PFX_Email'])
|
||||
self.assertEqual('jonkare', assertion['PFX_UserName'])
|
||||
|
||||
def test_oidc_groups_with_special_chars(self):
|
||||
"""Verify OIDC groups containing 'ñ' are correctly decoded."""
|
||||
self.config_fixture.config(group='federation', assertion_prefix='OIDC')
|
||||
assertion = self._get_assertion_via_wsgi(
|
||||
mapping_fixtures.WSGI_LATIN1_UTF8_GROUPS_ASSERTION
|
||||
)
|
||||
groups_value = assertion['OIDC-groups']
|
||||
self.assertIn('España', groups_value)
|
||||
self.assertNotIn('\u00c3', groups_value)
|
||||
|
||||
def test_bytes_value_utf8(self):
|
||||
"""Verify that bytes values are decoded as UTF-8."""
|
||||
environ = {'PFX_Name': 'Espa\u00f1a'.encode()}
|
||||
assertion = self._get_assertion_via_wsgi(environ)
|
||||
self.assertEqual('España', assertion['PFX_Name'])
|
||||
|
||||
def test_bytes_value_latin1_fallback(self):
|
||||
"""Verify that non-UTF-8 bytes fall back to ISO-8859-1 decoding."""
|
||||
environ = {'PFX_Name': 'Espa\u00f1a'.encode('ISO-8859-1')}
|
||||
assertion = self._get_assertion_via_wsgi(environ)
|
||||
self.assertEqual('España', assertion['PFX_Name'])
|
||||
|
||||
|
||||
class TestMappingLocals(unit.BaseTestCase):
|
||||
mapping_split = {
|
||||
'rules': [
|
||||
|
||||
@@ -1045,6 +1045,28 @@ UNICODE_NAME_ASSERTION = {
|
||||
'PFX_orgPersonType': 'Admin;Chief',
|
||||
}
|
||||
|
||||
# Simulates what mod_wsgi does to UTF-8 data per PEP 3333: the raw UTF-8
|
||||
# bytes are decoded as Latin-1, producing mojibake. For example, 'ñ' (UTF-8:
|
||||
# \xc3\xb1) becomes 'ñ' (Latin-1 interpretation of those two bytes).
|
||||
WSGI_LATIN1_UTF8_ASSERTION = {
|
||||
'PFX_Email': 'jon@example.com',
|
||||
'PFX_UserName': 'jonkare',
|
||||
'PFX_FirstName': 'Jon K\u00c3\u00a5re', # 'Jon Kåre' double-encoded
|
||||
'PFX_LastName': 'Hell\u00c3\u00a5n', # 'Hellån' double-encoded
|
||||
'PFX_orgPersonType': 'Admin;Chief',
|
||||
}
|
||||
|
||||
# Simulates OIDC groups assertion with non-ASCII characters (e.g. Spanish ñ)
|
||||
# arriving through WSGI with Latin-1 decoding of UTF-8 bytes.
|
||||
WSGI_LATIN1_UTF8_GROUPS_ASSERTION = {
|
||||
'OIDC-upn': 'user@example.com',
|
||||
'OIDC-groups': (
|
||||
'Team_Espa\u00c3\u00b1a_1401_power_user' # España double-encoded
|
||||
';federation-tests_power_user'
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
GROUPS_ASSERTION_ONLY_ONE_GROUP = {
|
||||
'userEmail': 'jill@example.com',
|
||||
'UserName': 'jsmith',
|
||||
|
||||
Reference in New Issue
Block a user