diff --git a/rfc3986/exceptions.py b/rfc3986/exceptions.py index 19ad191..fe072e1 100644 --- a/rfc3986/exceptions.py +++ b/rfc3986/exceptions.py @@ -7,3 +7,9 @@ class InvalidAuthority(RFC3986Exception): def __init__(self, authority): super(InvalidAuthority, self).__init__( "The authority ({0}) is not valid.".format(authority)) + + +class ResolutionError(RFC3986Exception): + def __init__(self, uri): + super(ResolutionError, self).__init__( + "{0} is not an absolute URI.".format(uri.unsplit())) diff --git a/rfc3986/misc.py b/rfc3986/misc.py index b89269b..bbb3a54 100644 --- a/rfc3986/misc.py +++ b/rfc3986/misc.py @@ -194,6 +194,17 @@ hier_part = '(//%s%s|%s|%s|%s)' % ( ) # See http://tools.ietf.org/html/rfc3986#section-4.3 -ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)$' % ( - component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern +ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)?$' % ( + component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern[1:-1] )) + + +# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 +def merge_paths(base_uri, relative_path): + """Merge a base URI's path with a relative URI's path.""" + if base_uri.path is None and base_uri.authority is not None: + return '/' + relative_path + else: + path = base_uri.path or '' + index = path.rfind('/') + return path[:index] + '/' + relative_path diff --git a/rfc3986/normalizers.py b/rfc3986/normalizers.py index d232093..bb0630c 100644 --- a/rfc3986/normalizers.py +++ b/rfc3986/normalizers.py @@ -35,6 +35,9 @@ def normalize_authority(authority): def normalize_path(path): + if not path: + return path + path = normalize_percent_characters(path) return remove_dot_segments(path) diff --git a/rfc3986/uri.py b/rfc3986/uri.py index ab9d982..f06cc47 100644 --- a/rfc3986/uri.py +++ b/rfc3986/uri.py @@ -15,10 +15,11 @@ from collections import namedtuple from .compat import to_str -from .exceptions import InvalidAuthority +from .exceptions import InvalidAuthority, ResolutionError from .misc import ( - FRAGMENT_MATCHER, IPv4_MATCHER, PATH_MATCHER, QUERY_MATCHER, - SCHEME_MATCHER, SUBAUTHORITY_MATCHER, URI_MATCHER, URI_COMPONENTS + ABSOLUTE_URI_MATCHER, FRAGMENT_MATCHER, IPv4_MATCHER, PATH_MATCHER, + QUERY_MATCHER, SCHEME_MATCHER, SUBAUTHORITY_MATCHER, URI_MATCHER, + URI_COMPONENTS, merge_paths ) from .normalizers import ( encode_component, normalize_scheme, normalize_authority, normalize_path, @@ -139,6 +140,18 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): return None return authority['userinfo'] + def is_absolute(self): + """Determine if this URI Reference is an absolute URI. + + See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation. + + :returns: ``True`` if it is an absolute URI, ``False`` otherwise. + :rtype: bool + """ + if ABSOLUTE_URI_MATCHER.match(self.unsplit()): + return True + return False + def is_valid(self, **kwargs): """Determines if the URI is valid. @@ -266,6 +279,70 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)): """ return tuple(self.normalize()) == tuple(other_ref.normalize()) + def resolve(self, base_uri, strict=False): + """Use an absolute URI Reference to resolve this relative reference. + + See http://tools.ietf.org/html/rfc3986#section-5 for more information. + + :param base_uri: Either a string or URIReference. It must be an + absolute URI or it will raise an exception. + :returns: A new URIReference which is the result of resolving this + reference using ``base_uri``. + :rtype: :class:`URIReference` + :raises ResolutionError: If the ``base_uri`` is not an absolute URI. + """ + if not isinstance(base_uri, URIReference): + base_uri = URIReference.from_string(base_uri) + + if not base_uri.is_absolute(): + raise ResolutionError(base_uri) + + # This is optional per + # http://tools.ietf.org/html/rfc3986#section-5.2.1 + base_uri = base_uri.normalize() + + # The reference we're resolving + resolving = self + + if not strict and resolving.scheme == base_uri.scheme: + resolving = resolving._replace(scheme=None) + + # http://tools.ietf.org/html/rfc3986#page-32 + if resolving.scheme is not None: + target = resolving._replace(path=normalize_path(resolving.path)) + else: + if resolving.authority is not None: + target = resolving._replace( + scheme=base_uri.scheme, + path=normalize_path(resolving.path) + ) + else: + if resolving.path is None: + if resolving.query is not None: + query = resolving.query + else: + query = base_uri.query + target = resolving._replace( + scheme=base_uri.scheme, + authority=base_uri.authority, + path=base_uri.path, + query=query + ) + else: + if resolving.path.startswith('/'): + path = normalize_path(resolving.path) + else: + path = normalize_path( + merge_paths(base_uri, resolving.path) + ) + target = resolving._replace( + scheme=base_uri.scheme, + authority=base_uri.authority, + path=path, + query=resolving.query + ) + return target + def unsplit(self): """Create a URI string from the components. diff --git a/tests/test_misc.py b/tests/test_misc.py new file mode 100644 index 0000000..eced326 --- /dev/null +++ b/tests/test_misc.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +from rfc3986.uri import URIReference +from rfc3986.misc import merge_paths + + +def test_merge_paths_with_base_path_without_base_authority(): + """Demonstrate merging with a base URI without an authority.""" + base = URIReference(scheme=None, + authority=None, + path='/foo/bar/bogus', + query=None, + fragment=None) + expected = '/foo/bar/relative' + assert merge_paths(base, 'relative') == expected + + +def test_merge_paths_with_base_authority_and_path(): + """Demonstrate merging with a base URI with an authority and path.""" + base = URIReference(scheme=None, + authority='authority', + path='/foo/bar/bogus', + query=None, + fragment=None) + expected = '/foo/bar/relative' + assert merge_paths(base, 'relative') == expected + + +def test_merge_paths_without_base_authority_or_path(): + """Demonstrate merging with a base URI without an authority or path.""" + base = URIReference(scheme=None, + authority=None, + path=None, + query=None, + fragment=None) + expected = '/relative' + assert merge_paths(base, 'relative') == expected + + +def test_merge_paths_with_base_authority_without_path(): + """Demonstrate merging with a base URI without an authority or path.""" + base = URIReference(scheme=None, + authority='authority', + path=None, + query=None, + fragment=None) + expected = '/relative' + assert merge_paths(base, 'relative') == expected diff --git a/tests/test_uri.py b/tests/test_uri.py index 35287d0..ba83a47 100644 --- a/tests/test_uri.py +++ b/tests/test_uri.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import pytest -from rfc3986.exceptions import InvalidAuthority +from rfc3986.exceptions import InvalidAuthority, ResolutionError from rfc3986.misc import URI_MATCHER from rfc3986.uri import URIReference @@ -407,3 +407,101 @@ class TestURIReferenceComparesToURIReferences: def test_different_basic_uris(self, basic_uri, basic_uri_with_port): uri = URIReference.from_string(basic_uri) assert (uri == URIReference.from_string(basic_uri_with_port)) is False + + +class TestURIReferenceIsAbsolute: + def test_basic_uris_are_absolute(self, basic_uri): + uri = URIReference.from_string(basic_uri) + assert uri.is_absolute() is True + + def test_basic_uris_with_ports_are_absolute(self, basic_uri_with_port): + uri = URIReference.from_string(basic_uri_with_port) + assert uri.is_absolute() is True + + def test_basic_uris_with_paths_are_absolute(self, basic_uri_with_path): + uri = URIReference.from_string(basic_uri_with_path) + assert uri.is_absolute() is True + + def test_uri_with_everything_are_not_absolute(self, uri_with_everything): + uri = URIReference.from_string(uri_with_everything) + assert uri.is_absolute() is False + + def test_absolute_paths_are_not_absolute_uris(self, absolute_path_uri): + uri = URIReference.from_string(absolute_path_uri) + assert uri.is_absolute() is False + + +# @pytest.fixture(params=[ +# basic_uri, basic_uri_with_port, basic_uri_with_path, +# scheme_and_path_uri, uri_with_path_and_query +# ]) +# @pytest.fixture(params=[absolute_path_uri, relative_uri]) + + +class TestURIReferencesResolve: + def test_with_basic_and_relative_uris(self, basic_uri, relative_uri): + R = URIReference.from_string(relative_uri) + B = URIReference.from_string(basic_uri) + T = R.resolve(basic_uri) + assert T.scheme == B.scheme + assert T.host == R.host + assert T.path == R.path + + def test_with_basic_and_absolute_path_uris(self, basic_uri, + absolute_path_uri): + R = URIReference.from_string(absolute_path_uri) + B = URIReference.from_string(basic_uri).normalize() + T = R.resolve(B) + assert T.scheme == B.scheme + assert T.host == B.host + assert T.path == R.path + + def test_with_basic_uri_and_relative_path(self, basic_uri): + R = URIReference.from_string('foo/bar/bogus') + B = URIReference.from_string(basic_uri).normalize() + T = R.resolve(B) + assert T.scheme == B.scheme + assert T.host == B.host + assert T.path == '/' + R.path + + def test_basic_uri_with_path_and_relative_path(self, basic_uri_with_path): + R = URIReference.from_string('foo/bar/bogus') + B = URIReference.from_string(basic_uri_with_path).normalize() + T = R.resolve(B) + assert T.scheme == B.scheme + assert T.host == B.host + + index = B.path.rfind('/') + assert T.path == B.path[:index] + '/' + R.path + + def test_uri_with_everything_raises_exception(self, uri_with_everything): + R = URIReference.from_string('foo/bar/bogus') + B = URIReference.from_string(uri_with_everything) + with pytest.raises(ResolutionError): + R.resolve(B) + + def test_basic_uri_resolves_itself(self, basic_uri): + R = URIReference.from_string(basic_uri) + B = URIReference.from_string(basic_uri) + T = R.resolve(B) + assert T == B + + def test_differing_schemes(self, basic_uri): + R = URIReference.from_string('https://example.com/path') + B = URIReference.from_string(basic_uri) + T = R.resolve(B) + assert T.scheme == R.scheme + + def test_resolve_pathless_fragment(self, basic_uri): + R = URIReference.from_string('#fragment') + B = URIReference.from_string(basic_uri) + T = R.resolve(B) + assert T.path is None + assert T.fragment == 'fragment' + + def test_resolve_pathless_query(self, basic_uri): + R = URIReference.from_string('?query') + B = URIReference.from_string(basic_uri) + T = R.resolve(B) + assert T.path is None + assert T.query == 'query'