Add urlparse function, rename urlparse module
- Start testing by testing the urlparse function doesn't error on the unicode snowman domain - Add strict keyword argument to ParseResult.from_string to turn off strict authority validation. - Add InvalidPort exception to be very specific when trying to convert a port string to an int fails. - Add stdlib compatibility shims
This commit is contained in:
@@ -29,8 +29,17 @@ __author__ = 'Ian Cordasco'
|
||||
__author_email__ = 'ian.cordasco@rackspace.com'
|
||||
__license__ = 'Apache v2.0'
|
||||
__copyright__ = 'Copyright 2014 Rackspace'
|
||||
__version__ = '0.2.2'
|
||||
__version__ = '0.3.0.dev1'
|
||||
|
||||
from .api import (URIReference, uri_reference, is_valid_uri, normalize_uri)
|
||||
from .api import (URIReference, uri_reference, is_valid_uri, normalize_uri,
|
||||
urlparse)
|
||||
from .parseresult import ParseResult
|
||||
|
||||
__all__ = ['URIReference', 'uri_reference', 'is_valid_uri', 'normalize_uri']
|
||||
__all__ = (
|
||||
'ParseResult',
|
||||
'URIReference',
|
||||
'is_valid_uri',
|
||||
'normalize_uri',
|
||||
'uri_reference',
|
||||
'urlparse',
|
||||
)
|
||||
|
||||
@@ -21,6 +21,7 @@ provides access to the class ``URIReference``.
|
||||
"""
|
||||
|
||||
from .uri import URIReference
|
||||
from .parseresult import ParseResult
|
||||
|
||||
|
||||
def uri_reference(uri, encoding='utf-8'):
|
||||
@@ -76,3 +77,16 @@ def normalize_uri(uri, encoding='utf-8'):
|
||||
"""
|
||||
normalized_reference = URIReference.from_string(uri, encoding).normalize()
|
||||
return normalized_reference.unsplit()
|
||||
|
||||
|
||||
def urlparse(uri, encoding='utf-8'):
|
||||
"""Parse a given URI and return a ParseResult.
|
||||
|
||||
This is a partial replacement of the standard library's urlparse function.
|
||||
|
||||
:param str uri: The URI to be parsed.
|
||||
:param str encoding: The encoding of the string provided.
|
||||
:returns: A parsed URI
|
||||
:rtype: :class:`~rfc3986.parseresult.ParseResult`
|
||||
"""
|
||||
return ParseResult.from_string(uri, encoding, strict=False)
|
||||
|
||||
@@ -9,6 +9,12 @@ class InvalidAuthority(RFC3986Exception):
|
||||
"The authority ({0}) is not valid.".format(authority))
|
||||
|
||||
|
||||
class InvalidPort(RFC3986Exception):
|
||||
def __init__(self, port):
|
||||
super(InvalidPort, self).__init__(
|
||||
'The port ("{0}") is not valid.'.format(port))
|
||||
|
||||
|
||||
class ResolutionError(RFC3986Exception):
|
||||
def __init__(self, uri):
|
||||
super(ResolutionError, self).__init__(
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
from collections import namedtuple
|
||||
|
||||
from . import exceptions
|
||||
from . import normalizers
|
||||
from . import uri
|
||||
|
||||
@@ -40,19 +41,35 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS)):
|
||||
return parse_result
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, uri_string, encoding='utf-8'):
|
||||
def from_string(cls, uri_string, encoding='utf-8', strict=True):
|
||||
"""Parse a URI from the given unicode URI string.
|
||||
|
||||
:param str uri_string: Unicode URI to be parsed into a reference.
|
||||
:param str encoding: The encoding of the string provided
|
||||
:param bool strict: Parse strictly according to :rfc:`3986` if True.
|
||||
If False, parse similarly to the standard library's urlparse
|
||||
function.
|
||||
:returns: :class:`ParseResult` or subclass thereof
|
||||
"""
|
||||
reference = uri.URIReference.from_string(uri_string, encoding)
|
||||
subauthority = reference.authority_info()
|
||||
# Thanks to Richard Barrell for this idea:
|
||||
# https://twitter.com/0x2ba22e11/status/617338811975139328
|
||||
userinfo, host, port = (subauthority.get(p)
|
||||
for p in ('userinfo', 'host', 'port'))
|
||||
try:
|
||||
subauthority = reference.authority_info()
|
||||
except exceptions.InvalidAuthority:
|
||||
if strict:
|
||||
raise
|
||||
userinfo, host, port = split_authority(reference.authority)
|
||||
else:
|
||||
# Thanks to Richard Barrell for this idea:
|
||||
# https://twitter.com/0x2ba22e11/status/617338811975139328
|
||||
userinfo, host, port = (subauthority.get(p)
|
||||
for p in ('userinfo', 'host', 'port'))
|
||||
|
||||
if port:
|
||||
try:
|
||||
port = int(port)
|
||||
except ValueError:
|
||||
raise exceptions.InvalidPort(port)
|
||||
|
||||
return cls(scheme=reference.scheme,
|
||||
userinfo=userinfo,
|
||||
host=host,
|
||||
@@ -101,6 +118,25 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS)):
|
||||
fragment=attrs_dict.get('fragment'))
|
||||
return ParseResult(uri_ref=ref, **attrs_dict)
|
||||
|
||||
def geturl(self):
|
||||
"""Standard library shim to the unsplit method."""
|
||||
return self.unsplit()
|
||||
|
||||
@property
|
||||
def hostname(self):
|
||||
"""Standard library shim for the host portion of the URI."""
|
||||
return self.host
|
||||
|
||||
@property
|
||||
def netloc(self):
|
||||
"""Standard library shim for the authority portion of the URI."""
|
||||
return self.authority
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
"""Standard library shim for the query portion of the URI."""
|
||||
return self.query
|
||||
|
||||
def unsplit(self):
|
||||
"""Create a URI string from the components.
|
||||
|
||||
@@ -108,3 +144,27 @@ class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS)):
|
||||
:rtype: str
|
||||
"""
|
||||
return self.reference.unsplit()
|
||||
|
||||
|
||||
def split_authority(authority):
|
||||
# Initialize our expected return values
|
||||
userinfo = host = port = None
|
||||
# Initialize an extra var we may need to use
|
||||
extra_host = None
|
||||
# Set-up rest in case there is no userinfo portion
|
||||
rest = authority
|
||||
|
||||
if u'@' in authority:
|
||||
userinfo, rest = authority.rsplit(u'@', 1)
|
||||
|
||||
# Handle IPv6 host addresses
|
||||
if rest.startswith(u'['):
|
||||
host, rest = rest.split(u']', 1)
|
||||
|
||||
if ':' in rest:
|
||||
extra_host, port = rest.split(u':', 1)
|
||||
|
||||
if extra_host and not host:
|
||||
host = extra_host
|
||||
|
||||
return userinfo, host, port
|
||||
@@ -1,10 +1,14 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from rfc3986 import uri_reference
|
||||
from rfc3986 import urlparse
|
||||
|
||||
|
||||
SNOWMAN = b'\xe2\x98\x83'
|
||||
|
||||
|
||||
def test_unicode_uri():
|
||||
url_bytestring = b'http://example.com?utf8=\xe2\x98\x83'
|
||||
url_bytestring = b'http://example.com?utf8=' + SNOWMAN
|
||||
unicode_url = url_bytestring.decode('utf-8')
|
||||
uri = uri_reference(unicode_url)
|
||||
assert uri.is_valid() is True
|
||||
@@ -12,15 +16,21 @@ def test_unicode_uri():
|
||||
|
||||
|
||||
def test_unicode_uri_passed_as_bytes():
|
||||
url_bytestring = b'http://example.com?utf8=\xe2\x98\x83'
|
||||
url_bytestring = b'http://example.com?utf8=' + SNOWMAN
|
||||
uri = uri_reference(url_bytestring)
|
||||
assert uri.is_valid() is True
|
||||
assert uri == 'http://example.com?utf8=%E2%98%83'
|
||||
|
||||
|
||||
def test_unicode_authority():
|
||||
url_bytestring = b'http://\xe2\x98\x83.com'
|
||||
url_bytestring = b'http://' + SNOWMAN + b'.com'
|
||||
unicode_url = url_bytestring.decode('utf-8')
|
||||
uri = uri_reference(unicode_url)
|
||||
assert uri.is_valid() is False
|
||||
assert uri == unicode_url
|
||||
|
||||
|
||||
def test_unicode_hostname():
|
||||
url_bytestring = b'http://' + SNOWMAN + b'.com'
|
||||
parsed = urlparse(url_bytestring)
|
||||
assert parsed
|
||||
|
||||
Reference in New Issue
Block a user