Merge pull request #3 from sigmavirus24/reference-resolution

Reference resolution
This commit is contained in:
Ian Cordasco
2014-07-26 15:17:19 -05:00
6 changed files with 248 additions and 6 deletions

View File

@@ -7,3 +7,9 @@ class InvalidAuthority(RFC3986Exception):
def __init__(self, authority):
super(InvalidAuthority, self).__init__(
"The authority ({0}) is not valid.".format(authority))
class ResolutionError(RFC3986Exception):
def __init__(self, uri):
super(ResolutionError, self).__init__(
"{0} is not an absolute URI.".format(uri.unsplit()))

View File

@@ -194,6 +194,17 @@ hier_part = '(//%s%s|%s|%s|%s)' % (
)
# See http://tools.ietf.org/html/rfc3986#section-4.3
ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)$' % (
component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern
ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)?$' % (
component_pattern_dict['scheme'], hier_part, QUERY_MATCHER.pattern[1:-1]
))
# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
def merge_paths(base_uri, relative_path):
"""Merge a base URI's path with a relative URI's path."""
if base_uri.path is None and base_uri.authority is not None:
return '/' + relative_path
else:
path = base_uri.path or ''
index = path.rfind('/')
return path[:index] + '/' + relative_path

View File

@@ -35,6 +35,9 @@ def normalize_authority(authority):
def normalize_path(path):
if not path:
return path
path = normalize_percent_characters(path)
return remove_dot_segments(path)

View File

@@ -15,10 +15,11 @@
from collections import namedtuple
from .compat import to_str
from .exceptions import InvalidAuthority
from .exceptions import InvalidAuthority, ResolutionError
from .misc import (
FRAGMENT_MATCHER, IPv4_MATCHER, PATH_MATCHER, QUERY_MATCHER,
SCHEME_MATCHER, SUBAUTHORITY_MATCHER, URI_MATCHER, URI_COMPONENTS
ABSOLUTE_URI_MATCHER, FRAGMENT_MATCHER, IPv4_MATCHER, PATH_MATCHER,
QUERY_MATCHER, SCHEME_MATCHER, SUBAUTHORITY_MATCHER, URI_MATCHER,
URI_COMPONENTS, merge_paths
)
from .normalizers import (
encode_component, normalize_scheme, normalize_authority, normalize_path,
@@ -139,6 +140,18 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)):
return None
return authority['userinfo']
def is_absolute(self):
"""Determine if this URI Reference is an absolute URI.
See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation.
:returns: ``True`` if it is an absolute URI, ``False`` otherwise.
:rtype: bool
"""
if ABSOLUTE_URI_MATCHER.match(self.unsplit()):
return True
return False
def is_valid(self, **kwargs):
"""Determines if the URI is valid.
@@ -266,6 +279,70 @@ class URIReference(namedtuple('URIReference', URI_COMPONENTS)):
"""
return tuple(self.normalize()) == tuple(other_ref.normalize())
def resolve(self, base_uri, strict=False):
"""Use an absolute URI Reference to resolve this relative reference.
See http://tools.ietf.org/html/rfc3986#section-5 for more information.
:param base_uri: Either a string or URIReference. It must be an
absolute URI or it will raise an exception.
:returns: A new URIReference which is the result of resolving this
reference using ``base_uri``.
:rtype: :class:`URIReference`
:raises ResolutionError: If the ``base_uri`` is not an absolute URI.
"""
if not isinstance(base_uri, URIReference):
base_uri = URIReference.from_string(base_uri)
if not base_uri.is_absolute():
raise ResolutionError(base_uri)
# This is optional per
# http://tools.ietf.org/html/rfc3986#section-5.2.1
base_uri = base_uri.normalize()
# The reference we're resolving
resolving = self
if not strict and resolving.scheme == base_uri.scheme:
resolving = resolving._replace(scheme=None)
# http://tools.ietf.org/html/rfc3986#page-32
if resolving.scheme is not None:
target = resolving._replace(path=normalize_path(resolving.path))
else:
if resolving.authority is not None:
target = resolving._replace(
scheme=base_uri.scheme,
path=normalize_path(resolving.path)
)
else:
if resolving.path is None:
if resolving.query is not None:
query = resolving.query
else:
query = base_uri.query
target = resolving._replace(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=base_uri.path,
query=query
)
else:
if resolving.path.startswith('/'):
path = normalize_path(resolving.path)
else:
path = normalize_path(
merge_paths(base_uri, resolving.path)
)
target = resolving._replace(
scheme=base_uri.scheme,
authority=base_uri.authority,
path=path,
query=resolving.query
)
return target
def unsplit(self):
"""Create a URI string from the components.

47
tests/test_misc.py Normal file
View File

@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
from rfc3986.uri import URIReference
from rfc3986.misc import merge_paths
def test_merge_paths_with_base_path_without_base_authority():
"""Demonstrate merging with a base URI without an authority."""
base = URIReference(scheme=None,
authority=None,
path='/foo/bar/bogus',
query=None,
fragment=None)
expected = '/foo/bar/relative'
assert merge_paths(base, 'relative') == expected
def test_merge_paths_with_base_authority_and_path():
"""Demonstrate merging with a base URI with an authority and path."""
base = URIReference(scheme=None,
authority='authority',
path='/foo/bar/bogus',
query=None,
fragment=None)
expected = '/foo/bar/relative'
assert merge_paths(base, 'relative') == expected
def test_merge_paths_without_base_authority_or_path():
"""Demonstrate merging with a base URI without an authority or path."""
base = URIReference(scheme=None,
authority=None,
path=None,
query=None,
fragment=None)
expected = '/relative'
assert merge_paths(base, 'relative') == expected
def test_merge_paths_with_base_authority_without_path():
"""Demonstrate merging with a base URI without an authority or path."""
base = URIReference(scheme=None,
authority='authority',
path=None,
query=None,
fragment=None)
expected = '/relative'
assert merge_paths(base, 'relative') == expected

View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import pytest
from rfc3986.exceptions import InvalidAuthority
from rfc3986.exceptions import InvalidAuthority, ResolutionError
from rfc3986.misc import URI_MATCHER
from rfc3986.uri import URIReference
@@ -407,3 +407,101 @@ class TestURIReferenceComparesToURIReferences:
def test_different_basic_uris(self, basic_uri, basic_uri_with_port):
uri = URIReference.from_string(basic_uri)
assert (uri == URIReference.from_string(basic_uri_with_port)) is False
class TestURIReferenceIsAbsolute:
def test_basic_uris_are_absolute(self, basic_uri):
uri = URIReference.from_string(basic_uri)
assert uri.is_absolute() is True
def test_basic_uris_with_ports_are_absolute(self, basic_uri_with_port):
uri = URIReference.from_string(basic_uri_with_port)
assert uri.is_absolute() is True
def test_basic_uris_with_paths_are_absolute(self, basic_uri_with_path):
uri = URIReference.from_string(basic_uri_with_path)
assert uri.is_absolute() is True
def test_uri_with_everything_are_not_absolute(self, uri_with_everything):
uri = URIReference.from_string(uri_with_everything)
assert uri.is_absolute() is False
def test_absolute_paths_are_not_absolute_uris(self, absolute_path_uri):
uri = URIReference.from_string(absolute_path_uri)
assert uri.is_absolute() is False
# @pytest.fixture(params=[
# basic_uri, basic_uri_with_port, basic_uri_with_path,
# scheme_and_path_uri, uri_with_path_and_query
# ])
# @pytest.fixture(params=[absolute_path_uri, relative_uri])
class TestURIReferencesResolve:
def test_with_basic_and_relative_uris(self, basic_uri, relative_uri):
R = URIReference.from_string(relative_uri)
B = URIReference.from_string(basic_uri)
T = R.resolve(basic_uri)
assert T.scheme == B.scheme
assert T.host == R.host
assert T.path == R.path
def test_with_basic_and_absolute_path_uris(self, basic_uri,
absolute_path_uri):
R = URIReference.from_string(absolute_path_uri)
B = URIReference.from_string(basic_uri).normalize()
T = R.resolve(B)
assert T.scheme == B.scheme
assert T.host == B.host
assert T.path == R.path
def test_with_basic_uri_and_relative_path(self, basic_uri):
R = URIReference.from_string('foo/bar/bogus')
B = URIReference.from_string(basic_uri).normalize()
T = R.resolve(B)
assert T.scheme == B.scheme
assert T.host == B.host
assert T.path == '/' + R.path
def test_basic_uri_with_path_and_relative_path(self, basic_uri_with_path):
R = URIReference.from_string('foo/bar/bogus')
B = URIReference.from_string(basic_uri_with_path).normalize()
T = R.resolve(B)
assert T.scheme == B.scheme
assert T.host == B.host
index = B.path.rfind('/')
assert T.path == B.path[:index] + '/' + R.path
def test_uri_with_everything_raises_exception(self, uri_with_everything):
R = URIReference.from_string('foo/bar/bogus')
B = URIReference.from_string(uri_with_everything)
with pytest.raises(ResolutionError):
R.resolve(B)
def test_basic_uri_resolves_itself(self, basic_uri):
R = URIReference.from_string(basic_uri)
B = URIReference.from_string(basic_uri)
T = R.resolve(B)
assert T == B
def test_differing_schemes(self, basic_uri):
R = URIReference.from_string('https://example.com/path')
B = URIReference.from_string(basic_uri)
T = R.resolve(B)
assert T.scheme == R.scheme
def test_resolve_pathless_fragment(self, basic_uri):
R = URIReference.from_string('#fragment')
B = URIReference.from_string(basic_uri)
T = R.resolve(B)
assert T.path is None
assert T.fragment == 'fragment'
def test_resolve_pathless_query(self, basic_uri):
R = URIReference.from_string('?query')
B = URIReference.from_string(basic_uri)
T = R.resolve(B)
assert T.path is None
assert T.query == 'query'