perf(Request): Optimize instantiation

This patch reduces the time it takes to instantiate a Request object
by a few microseconds by implementing lazy header parsing from the
WSGI environ.

This should not impact reading headers significantly, so this change
should be a net win.

Kudos to the OpenStack Swift team. This idea came from my research into
their swob module.

Closes #213
This commit is contained in:
kgriffs
2014-01-03 15:41:31 -06:00
parent 25759c73e3
commit 34f76c002c
3 changed files with 57 additions and 41 deletions

View File

@@ -70,7 +70,7 @@ class Request(object):
__slots__ = (
'env',
'_headers',
'_cached_headers',
'method',
'_params',
'path',
@@ -118,7 +118,8 @@ class Request(object):
else:
self._params = {}
self._headers = helpers.parse_headers(env)
helpers.normalize_headers(env)
self._cached_headers = {}
# NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust,
# normalizing semantics between, e.g., gunicorn and wsgiref.
@@ -221,7 +222,7 @@ class Request(object):
@property
def accept(self):
"""Value of the Accept header, or */* if not found per RFC."""
accept = self._get_header_by_wsgi_name('ACCEPT')
accept = self._get_header_by_wsgi_name('HTTP_ACCEPT')
# NOTE(kgriffs): Per RFC, missing accept header is
# equivalent to '*/*'
@@ -235,7 +236,7 @@ class Request(object):
@property
def auth(self):
"""Value of the Authorization header, or None if not found."""
return self._get_header_by_wsgi_name('AUTHORIZATION')
return self._get_header_by_wsgi_name('HTTP_AUTHORIZATION')
@property
def content_length(self):
@@ -248,7 +249,7 @@ class Request(object):
HTTPBadRequest: The header had a value, but it wasn't
formatted correctly or was a negative number.
"""
value = self._get_header_by_wsgi_name('CONTENT_LENGTH')
value = self._get_header_by_wsgi_name('HTTP_CONTENT_LENGTH')
if value:
try:
value_as_int = int(value)
@@ -269,7 +270,7 @@ class Request(object):
@property
def content_type(self):
"""Value of the Content-Type header, or None if not found."""
return self._get_header_by_wsgi_name('CONTENT_TYPE')
return self._get_header_by_wsgi_name('HTTP_CONTENT_TYPE')
@property
def date(self):
@@ -286,7 +287,7 @@ class Request(object):
"""
http_date = self._get_header_by_wsgi_name('DATE')
http_date = self._get_header_by_wsgi_name('HTTP_DATE')
try:
return util.http_date_to_dt(http_date)
except ValueError:
@@ -297,32 +298,32 @@ class Request(object):
@property
def expect(self):
"""Value of the Expect header, or None if missing."""
return self._get_header_by_wsgi_name('EXPECT')
return self._get_header_by_wsgi_name('HTTP_EXPECT')
@property
def if_match(self):
"""Value of the If-Match header, or None if missing."""
return self._get_header_by_wsgi_name('IF_MATCH')
return self._get_header_by_wsgi_name('HTTP_IF_MATCH')
@property
def if_none_match(self):
"""Value of the If-None-Match header, or None if missing."""
return self._get_header_by_wsgi_name('IF_NONE_MATCH')
return self._get_header_by_wsgi_name('HTTP_IF_NONE_MATCH')
@property
def if_modified_since(self):
"""Value of the If-Modified-Since header, or None if missing."""
return self._get_header_by_wsgi_name('IF_MODIFIED_SINCE')
return self._get_header_by_wsgi_name('HTTP_IF_MODIFIED_SINCE')
@property
def if_unmodified_since(self):
"""Value of the If-Unmodified-Since header, or None if missing."""
return self._get_header_by_wsgi_name('IF_UNMODIFIED_SINCE')
return self._get_header_by_wsgi_name('HTTP_IF_UNMODIFIED_SINCE')
@property
def if_range(self):
"""Value of the If-Range header, or None if missing."""
return self._get_header_by_wsgi_name('IF_RANGE')
return self._get_header_by_wsgi_name('HTTP_IF_RANGE')
@property
def protocol(self):
@@ -349,7 +350,7 @@ class Request(object):
formatted correctly.
"""
value = self._get_header_by_wsgi_name('RANGE')
value = self._get_header_by_wsgi_name('HTTP_RANGE')
if value:
if ',' in value:
@@ -406,11 +407,11 @@ class Request(object):
@property
def user_agent(self):
"""Value of the User-Agent string, or None if missing."""
return self._get_header_by_wsgi_name('USER_AGENT')
return self._get_header_by_wsgi_name('HTTP_USER_AGENT')
@property
def headers(self):
"""Get HTTP headers
"""Get raw HTTP headers
Build a temporary dictionary of dash-separated HTTP headers,
which can be used as a whole, like, to perform an HTTP request.
@@ -418,11 +419,24 @@ class Request(object):
If you want to lookup a header, please use `get_header` instead.
Returns:
A dictionary of HTTP headers.
A new dictionary of HTTP headers.
"""
return dict([(k.replace('_', '-'), v)
for k, v in self._headers.items()])
# NOTE(kgriffs: First time here will cache the dict so all we
# have to do is clone it in the future.
if not self._cached_headers:
headers = self._cached_headers
env = self.env
for name, value in env.items():
if name.startswith('HTTP_'):
# NOTE(kgriffs): Don't take the time to fix the case
# since headers are supposed to be case-sensitive
# anyway.
headers[name[5:].replace('_', '-')] = value
return self._cached_headers.copy()
def get_header(self, name, required=False):
"""Return a header value as a string
@@ -448,7 +462,7 @@ class Request(object):
# Don't take the time to cache beforehand, using HTTP naming.
# This will be faster, assuming that most headers are looked
# up only once, and not all headers will be requested.
return self._headers[name.upper().replace('-', '_')]
return self.env['HTTP_' + name.upper().replace('-', '_')]
except KeyError:
if not required:
return None
@@ -701,6 +715,6 @@ class Request(object):
"""
try:
return self._headers[name] or None
return self.env[name] or None
except KeyError:
return None

View File

@@ -51,14 +51,11 @@ def parse_query_string(query_string):
return params
def parse_headers(env):
"""Parse HTTP headers out of a WSGI environ dictionary
def normalize_headers(env):
"""Normalize HTTP headers in an WSGI environ dictionary.
Args:
env: A WSGI environ dictionary
Returns:
A dict containing (name, value) pairs, one per HTTP header
env: A WSGI environ dictionary to normalize (in-place)
Raises:
KeyError: The env dictionary did not contain a key that is required by
@@ -66,34 +63,27 @@ def parse_headers(env):
TypeError: env is not dictionary-like. In other words, it has no
attribute '__getitem__'.
"""
# Parse HTTP_*
headers = {}
for key in env:
if key.startswith('HTTP_'):
headers[key[5:]] = env[key]
# NOTE(kgriffs): Per the WSGI spec, HOST, Content-Type, and
# CONTENT_LENGTH are not under HTTP_* and so we normalize
# that here.
# Per the WSGI spec, Content-Type is not under HTTP_*
if 'CONTENT_TYPE' in env:
headers['CONTENT_TYPE'] = env['CONTENT_TYPE']
env['HTTP_CONTENT_TYPE'] = env['CONTENT_TYPE']
# Per the WSGI spec, Content-Length is not under HTTP_*
if 'CONTENT_LENGTH' in env:
headers['CONTENT_LENGTH'] = env['CONTENT_LENGTH']
env['HTTP_CONTENT_LENGTH'] = env['CONTENT_LENGTH']
# Fallback to SERVER_* vars if the Host header isn't specified
if 'HOST' not in headers:
if 'HTTP_HOST' not in env:
host = env['SERVER_NAME']
port = env['SERVER_PORT']
if port != '80':
host = ''.join([host, ':', port])
headers['HOST'] = host
return headers
env['HTTP_HOST'] = host
class Body(object):

View File

@@ -239,6 +239,18 @@ class TestHeaders(testing.TestBase):
actual_value = self.resource.req.get_header(name)
self.assertEqual(actual_value, expected_value)
def test_get_raw_headers(self):
headers = [
('Client-ID', '692ba466-74bb-11e3-bf3f-7567c531c7ca'),
('Accept', 'audio/*; q=0.2, audio/basic')
]
environ = testing.create_environ(headers=headers)
req = falcon.Request(environ)
for name, value in headers:
self.assertIn((name.upper(), value), req.headers.items())
def test_passthrough_resp_headers(self):
self.simulate_request(self.test_route)