feat(Request): add "access_route" and "remote_addr"

Inspired by Werkzeug but improved. Related to #539 and #598.

The "access_route" property supports:

1. derive addrs from "Forwarded" header (defined by RFC7239)
2. derive addrs from "X-Forwarded-For" header
3. derive addrs from "X-Read-IP" header
4. or derive addr from WSGI "REMOTE_ADDR" header

The "remote_addr" property is a shortcut of WSGI "REMOTE_ADDR" header

Thanks to all the code review and advices from @MackYoel and @kgriffs.
This commit is contained in:
Philip_Tzou
2015-09-09 13:02:31 -07:00
parent 0fefca82a3
commit 9430392699
4 changed files with 193 additions and 5 deletions

View File

@@ -27,9 +27,9 @@ except AttributeError: # pragma nocover
import mimeparse
import six
from falcon.errors import *
from falcon.errors import * # NOQA
from falcon import util
from falcon.util.uri import parse_query_string, parse_host
from falcon.util.uri import parse_query_string, parse_host, unquote_string
from falcon import request_helpers as helpers
# NOTE(tbug): In some cases, http_cookies is not a module
@@ -205,6 +205,7 @@ class Request(object):
'_wsgierrors',
'options',
'_cookies',
'_cached_access_route',
)
# Allow child classes to override this
@@ -257,6 +258,7 @@ class Request(object):
self._cached_headers = None
self._cached_uri = None
self._cached_relative_uri = None
self._cached_access_route = None
try:
self.content_type = self.env['CONTENT_TYPE']
@@ -521,6 +523,64 @@ class Request(object):
return self._cookies.copy()
@property
def access_route(self):
"""A list of all addresses from client to the last proxy server.
Inspired by werkzeug's ``access_route``.
Note:
The list may contain string(s) other than IPv4 / IPv6 address. For
example the "unknown" identifier and obfuscated identifier defined
by `RFC 7239`_.
.. _RFC 7239: https://tools.ietf.org/html/rfc7239#section-6
Warning:
HTTP Forwarded headers can be forged by any client or proxy.
Use this property with caution and write your own verify function.
The best practice is always using :py:attr:`~.remote_addr` unless
your application is hosted behind some reverse proxy server(s).
Also only trust the **last N** addresses provided by those reverse
proxy servers.
This property will try to derive addresses sequentially from:
- ``Forwarded``
- ``X-Forwarded-For``
- ``X-Real-IP``
- **or** the IP address of the closest client/proxy
"""
if self._cached_access_route is None:
access_route = []
if 'HTTP_FORWARDED' in self.env:
access_route = self._parse_rfc_forwarded()
if not access_route and 'HTTP_X_FORWARDED_FOR' in self.env:
access_route = [ip.strip() for ip in
self.env['HTTP_X_FORWARDED_FOR'].split(',')]
if not access_route and 'HTTP_X_REAL_IP' in self.env:
access_route = [self.env['HTTP_X_REAL_IP']]
if not access_route and 'REMOTE_ADDR' in self.env:
access_route = [self.env['REMOTE_ADDR']]
self._cached_access_route = access_route
return self._cached_access_route
@property
def remote_addr(self):
"""String of the IP address of the closest client/proxy.
Address will only be derived from WSGI ``REMOTE_ADDR`` header, which
can not be modified by any client or proxy.
Note:
If your application is behind one or more reverse proxies, you may
need to use :py:obj:`~.access_route` to retrieve the real IP
address of the client.
"""
return self.env.get('REMOTE_ADDR')
# ------------------------------------------------------------------------
# Methods
# ------------------------------------------------------------------------
@@ -626,7 +686,8 @@ class Request(object):
``HTTPBadRequest`` instead of returning gracefully when the
header is not found (default ``False``).
obs_date (bool, optional): Support obs-date formats according to
RFC 7231, e.g.: "Sunday, 06-Nov-94 08:49:37 GMT" (default ``False``).
RFC 7231, e.g.: "Sunday, 06-Nov-94 08:49:37 GMT"
(default ``False``).
Returns:
datetime: The value of the specified header if it exists,
@@ -1035,6 +1096,26 @@ class Request(object):
self._params.update(extra_params)
def _parse_rfc_forwarded(self):
"""Parse RFC 7239 "Forwarded" header.
Returns:
list: addresses derived from "for" parameters.
"""
addr = []
for forwarded in self.env['HTTP_FORWARDED'].split(','):
for param in forwarded.split(';'):
param = param.strip().split('=', 1)
if len(param) == 1:
continue
key, val = param
if key.lower() != 'for':
# we only want for params
continue
host, _ = parse_host(unquote_string(val))
addr.append(host)
return addr
# PERF: To avoid typos and improve storage space and speed over a dict.
class RequestOptions(object):

View File

@@ -107,8 +107,9 @@ def http_date_to_dt(http_date, obs_date=False):
Args:
http_date (str): An RFC 1123 date string, e.g.:
"Tue, 15 Nov 1994 12:45:26 GMT".
obs_date (bool, optional): Support obs-date formats according to
RFC 7231, e.g.: "Sunday, 06-Nov-94 08:49:37 GMT" (default ``False``).
obs_date (bool, optional): Support obs-date formats according to
RFC 7231, e.g.:
"Sunday, 06-Nov-94 08:49:37 GMT" (default ``False``).
Returns:
datetime: A UTC datetime instance corresponding to the given

View File

@@ -376,3 +376,35 @@ def parse_host(host, default_port=None):
# or a domain name plus a port
name, _, port = host.partition(':')
return (name, int(port))
def unquote_string(quoted):
"""Unquote an RFC 7320 "quoted-string".
Args:
quoted (str): Original quoted string
Returns:
str: unquoted string
Raises:
TypeError: `quoted` was not a ``str``.
"""
tmp_quoted = quoted.strip()
if len(tmp_quoted) < 2:
return quoted
elif tmp_quoted[0] != '"' or tmp_quoted[-1] != '"':
# return original one, prevent side-effect
return quoted
tmp_quoted = tmp_quoted[1:-1]
# PERF(philiptzou): Most header strings don't contain "quoted-pair" which
# defined by RFC 7320. We use this little trick (quick string search) to
# speed up string parsing by preventing unnecessary processes if possible.
if '\\' not in tmp_quoted:
return tmp_quoted
elif r'\\' not in tmp_quoted:
return tmp_quoted.replace('\\', '')
else:
return '\\'.join([q.replace('\\', '')
for q in tmp_quoted.split(r'\\')])

View File

@@ -0,0 +1,74 @@
from falcon.request import Request
import falcon.testing as testing
class TestAccessRoute(testing.TestBase):
def test_remote_addr_only(self):
req = Request(testing.create_environ(
host='example.com',
path='/access_route',
headers={
'Forwarded': ('for=192.0.2.43, for="[2001:db8:cafe::17]:555",'
'for="unknown", by=_hidden,for="\\"\\\\",'
'for="198\\.51\\.100\\.17\\:1236";'
'proto=https;host=example.com')
}))
self.assertEqual(req.remote_addr, '127.0.0.1')
def test_rfc_forwarded(self):
req = Request(testing.create_environ(
host='example.com',
path='/access_route',
headers={
'Forwarded': ('for=192.0.2.43,for=,'
'for="[2001:db8:cafe::17]:555",'
'for="unknown", by=_hidden,for="\\"\\\\",'
'for="_don\\\"t_\\try_this\\\\at_home_\\42",'
'for="198\\.51\\.100\\.17\\:1236";'
'proto=https;host=example.com')
}))
compares = ['192.0.2.43', '', '2001:db8:cafe::17',
'unknown', '"\\', '_don"t_try_this\\at_home_42',
'198.51.100.17']
self.assertEqual(req.access_route, compares)
# test cached
self.assertEqual(req.access_route, compares)
def test_malformed_rfc_forwarded(self):
req = Request(testing.create_environ(
host='example.com',
path='/access_route',
headers={
'Forwarded': 'for'
}))
self.assertEqual(req.access_route, ['127.0.0.1'])
# test cached
self.assertEqual(req.access_route, ['127.0.0.1'])
def test_x_forwarded_for(self):
req = Request(testing.create_environ(
host='example.com',
path='/access_route',
headers={
'X-Forwarded-For': ('192.0.2.43, 2001:db8:cafe::17,'
'unknown, _hidden, 203.0.113.60')
}))
self.assertEqual(req.access_route,
['192.0.2.43', '2001:db8:cafe::17',
'unknown', '_hidden', '203.0.113.60'])
def test_x_real_ip(self):
req = Request(testing.create_environ(
host='example.com',
path='/access_route',
headers={
'X-Real-IP': '2001:db8:cafe::17'
}))
self.assertEqual(req.access_route, ['2001:db8:cafe::17'])
def test_remote_addr(self):
req = Request(testing.create_environ(
host='example.com',
path='/access_route'))
self.assertEqual(req.access_route, ['127.0.0.1'])