Files
deb-python-requestbuilder/requestbuilder/service.py
Garrett Holmstrom e2589b65ec Handle redirects ourselves so they are always signed
Requests 2.3 strips Authorization headers when redirects send requests
to URLs with different DNS names, which results in requests with no
Authorization headers at all.  We address this by switching off its
internal redirect handling.

Note that the means by which this is done does not exist in requests
1.1.
2015-09-21 13:27:39 -07:00

388 lines
17 KiB
Python

# Copyright (c) 2012-2015, Eucalyptus Systems, Inc.
#
# Permission to use, copy, modify, and/or distribute this software for
# any purpose with or without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
from __future__ import absolute_import
import cgi
import collections
import datetime
import functools
import io
import logging
import os.path
import random
import socket
import time
import requests.exceptions
import six
import six.moves.urllib_parse as urlparse
from requestbuilder.exceptions import (ClientError, ServerError,
ServiceInitError, TimeoutError)
from requestbuilder.mixins import RegionConfigurableMixin
class BaseService(RegionConfigurableMixin):
NAME = None
DESCRIPTION = ''
API_VERSION = ''
MAX_RETRIES = 2
TIMEOUT = 30 # socket timeout in seconds
REGION_ENVVAR = None
URL_ENVVAR = None
ARGS = []
def __init__(self, config, loglevel=None, max_retries=None, timeout=None,
**kwargs):
self.args = kwargs
self.config = config
self.endpoint = None
self.log = logging.getLogger(self.__class__.__name__)
if loglevel is not None:
self.log.level = loglevel
self.max_retries = max_retries
self.region_name = None # Note this can differ from config.region
self.session_args = {}
self.timeout = timeout
self._session = None
@classmethod
def from_other(cls, other, **kwargs):
kwargs.setdefault('loglevel', other.log.level)
kwargs.setdefault('max_retries', other.max_retries)
kwargs.setdefault('session_args', dict(other.session_args))
kwargs.setdefault('timeout', other.timeout)
if 'region' in other.args:
kwargs.setdefault('region', other.args['region'])
new = cls(other.config, **kwargs)
new.configure()
return new
def configure(self):
# Configure user and region before grabbing endpoint info since
# the latter may depend upon the former
self.update_config_view()
self.__configure_endpoint()
# Configure timeout and retry handlers
if self.max_retries is None:
config_max_retries = self.config.get_global_option('max-retries')
if config_max_retries is not None:
self.max_retries = int(config_max_retries)
else:
self.max_retries = self.MAX_RETRIES
if self.timeout is None:
config_timeout = self.config.get_global_option('timeout')
if config_timeout is not None:
self.timeout = float(config_timeout)
else:
self.timeout = self.TIMEOUT
# SSL cert verification is opt-in
self.session_args['verify'] = self.config.convert_to_bool(
self.config.get_region_option('verify-ssl'), default=False)
# requests only applies proxy config in code paths we don't use
self.session_args['proxies'] = _get_proxies()
# Ensure everything is okay and finish up
self.validate_config()
@property
def session(self):
if self._session is None:
self._session = requests.session()
for key, val in self.session_args.iteritems():
setattr(self._session, key, val)
for adapter in self._session.adapters.values():
# send_request handles retries to allow for re-signing
adapter.max_retries = 0
return self._session
def validate_config(self):
if self.endpoint is None:
if self.NAME is not None:
url_opt = '{0}-url'.format(self.NAME)
available_regions = self.config.get_all_region_options(url_opt)
if len(available_regions) > 0:
msg = ('No {0} endpoint to connect to was given. '
'Configured regions with {0} endpoints are: '
'{1}').format(self.NAME,
', '.join(sorted(available_regions)))
else:
msg = ('No {0} endpoint to connect to was given. {0} '
'endpoints may be specified in a config file with '
'"{1}".').format(self.NAME, url_opt)
else:
msg = 'No endpoint to connect to was given'
raise ServiceInitError(msg)
def get_request_url(self, method='GET', path=None, params=None,
headers=None, data=None, files=None, auth=None):
url = self.__get_url_for_path(path)
headers = dict(headers or {})
if 'host' not in [header.lower() for header in headers]:
headers['Host'] = urlparse.urlparse(self.endpoint).netloc
p_request = self.__log_and_prepare_request(method, url, params, data,
files, headers, auth)
return p_request.url
def send_request(self, method='GET', path=None, params=None, headers=None,
data=None, files=None, auth=None):
url = self.__get_url_for_path(path)
headers = dict(headers)
if 'host' not in [header.lower() for header in headers]:
headers['Host'] = urlparse.urlparse(self.endpoint).netloc
try:
max_tries = self.max_retries + 1
assert max_tries >= 1
redirects_left = 5
if isinstance(data, file) and hasattr(data, 'seek'):
# If we're redirected we need to be able to reset
data_file_offset = data.tell()
else:
data_file_offset = None
while True:
for attempt_no, delay in enumerate(
_generate_delays(max_tries), 1):
# Use exponential backoff if this is a retry
if delay > 0:
self.log.debug('will retry after %.3f seconds', delay)
time.sleep(delay)
self.log.info('sending request (attempt %i of %i)',
attempt_no, max_tries)
p_request = self.__log_and_prepare_request(
method, url, params, data, files, headers, auth)
p_request.start_time = datetime.datetime.now()
try:
# verify= works around a bug in requests < 1.2.
# See requests commit 325ea7b.
response = self.session.send(
p_request, stream=True, timeout=self.timeout,
verify=self.session_args['verify'],
allow_redirects=False)
except requests.exceptions.Timeout:
if attempt_no < max_tries:
self.log.debug('timeout', exc_info=True)
if data_file_offset is not None:
self.log.debug('re-seeking body to '
'beginning of file')
# pylint: disable=E1101
data.seek(data_file_offset)
# pylint: enable=E1101
continue
elif not hasattr(data, 'tell'):
continue
# Fallthrough -- if it has a file pointer but not
# seek we can't retry because we can't rewind.
raise
if response.status_code not in (500, 503):
break
# If it *was* in that list, retry
if (response.status_code in (301, 302, 307, 308) and
redirects_left > 0 and 'Location' in response.headers):
# Standard redirect -- we need to handle this ourselves
# because we have to re-sign requests when their URLs
# change.
redirects_left -= 1
parsed_rdr = urlparse.urlparse(
response.headers['Location'])
parsed_url = urlparse.urlparse(url)
new_url_bits = []
for rdr_bit, url_bit in zip(parsed_rdr, parsed_url):
new_url_bits.append(rdr_bit or url_bit)
if 'Host' in headers:
headers['Host'] = new_url_bits[1] # netloc
url = urlparse.urlunparse(new_url_bits)
self.log.debug('redirecting to %s (%i redirect(s) '
'remaining)', url, redirects_left)
if data_file_offset is not None:
self.log.debug('re-seeking body to beginning of file')
# pylint: disable=E1101
data.seek(data_file_offset)
# pylint: enable=E1101
continue
elif response.status_code >= 300:
# We include 30x because we've handled the standard method
# of redirecting, but the server might still be trying to
# redirect another way for some reason.
self.handle_http_error(response)
return response
except requests.exceptions.Timeout as exc:
self.log.debug('timeout', exc_info=True)
raise TimeoutError('request timed out', exc)
except requests.exceptions.ConnectionError as exc:
self.log.debug('connection error', exc_info=True)
return self.__handle_connection_error(exc)
except requests.exceptions.HTTPError as exc:
return self.handle_http_error(response)
except requests.exceptions.RequestException as exc:
self.log.debug('request error', exc_info=True)
raise ClientError(exc)
def __handle_connection_error(self, err):
if isinstance(err, six.string_types):
msg = err
elif isinstance(err, Exception) and len(err.args) > 0:
if hasattr(err.args[0], 'reason'):
msg = err.args[0].reason
elif isinstance(err.args[0], Exception):
return self.__handle_connection_error(err.args[0])
else:
msg = err.args[0]
else:
raise ClientError('connection error')
raise ClientError('connection error ({0})'.format(msg))
def handle_http_error(self, response):
self.log.debug('HTTP error', exc_info=True)
raise ServerError(response)
def __get_url_for_path(self, path):
if path:
# We can't simply use urljoin because a path might start with '/'
# like it could for S3 keys that start with that character.
if self.endpoint.endswith('/'):
return self.endpoint + path
else:
return self.endpoint + '/' + path
else:
return self.endpoint
def __log_and_prepare_request(self, method, url, params, data, files,
headers, auth):
hooks = {'response': functools.partial(_log_response_data, self.log)}
if auth:
bound_auth = auth.bind_to_service(self)
else:
bound_auth = None
request = requests.Request(method=method, url=url, params=params,
data=data, files=files, headers=headers,
auth=bound_auth)
p_request = request.prepare()
p_request.hooks = {'response': hooks['response']}
self.log.debug('request method: %s', request.method)
self.log.debug('request url: %s', p_request.url)
if isinstance(p_request.headers, (dict, collections.Mapping)):
for key, val in sorted(p_request.headers.iteritems()):
if key.lower().endswith('password'):
val = '<redacted>'
self.log.debug('request header: %s: %s', key, val)
if isinstance(request.params, (dict, collections.Mapping)):
for key, val in sorted(urlparse.parse_qsl(
urlparse.urlparse(p_request.url).query)):
if key.lower().endswith('password'):
val = '<redacted>'
self.log.debug('request param: %s: %s', key, val)
if isinstance(request.data, (dict, collections.Mapping)):
content_type, content_type_params = cgi.parse_header(
p_request.headers.get('content-type') or '')
if content_type == 'multipart/form-data':
data = cgi.parse_multipart(io.BytesIO(p_request.body),
content_type_params)
elif content_type == 'application/x-www-form-urlencoded':
data = dict(urlparse.parse_qsl(p_request.body))
else:
data = request.data
for key, val in sorted(data.items()):
if key in (request.files or {}):
# We probably don't want to include the contents of
# entire files in debug output.
continue
if key.lower().endswith('password'):
val = '<redacted>'
self.log.debug('request data: %s: %s', key, val)
if isinstance(request.files, (dict, collections.Mapping)):
for key, val in sorted(request.files.iteritems()):
if hasattr(val, '__len__'):
val = '<{0} bytes>'.format(len(val))
self.log.debug('request file: %s: %s', key, val)
return p_request
def __configure_endpoint(self):
# self.args gets highest precedence
if self.args.get('url'):
url, region_name = _parse_endpoint_url(self.args['url'])
# Environment comes next
elif os.getenv(self.URL_ENVVAR):
url, region_name = _parse_endpoint_url(os.getenv(self.URL_ENVVAR))
# Try the config file
elif self.NAME:
url, section = self.config.get_region_option2(self.NAME + '-url')
if section:
# Check to see if the region name is explicitly specified
region_name = self.config.get_region_option('name', section)
if region_name is None:
# If it isn't then just grab the end of the section name
region_name = section.rsplit(':', 1)[-1]
else:
region_name = None
self.endpoint = url
self.region_name = region_name
def _log_response_data(logger, response, **_):
if hasattr(response.request, 'start_time'):
duration = datetime.datetime.now() - response.request.start_time
logger.debug('response time: %i.%03i seconds', duration.seconds,
duration.microseconds // 1000)
if response.status_code >= 400:
logger.error('response status: %i', response.status_code)
else:
logger.info('response status: %i', response.status_code)
if isinstance(response.headers, (dict, collections.Mapping)):
for key, val in sorted(response.headers.items()):
logger.debug('response header: %s: %s', key, val)
def _generate_delays(max_tries):
if max_tries >= 1:
yield 0
for retry_no in range(1, max_tries):
next_delay = (random.random() + 1) * 2 ** (retry_no - 1)
yield min((next_delay, 15))
def _parse_endpoint_url(urlish):
"""
If given a URL, return the URL and None. If given a URL with a string and
"::" prepended to it, return the URL and the prepended string. This is
meant to give one a means to supply a region name via arguments and
variables that normally only accept URLs.
"""
if '::' in urlish:
region, url = urlish.split('::', 1)
else:
region = None
url = urlish
return url, region
def _get_proxies():
try:
bypass = six.moves.urllib.request.proxy_bypass()
except (TypeError, socket.gaierror):
# This blows up on my old OS X machine
bypass = False
if bypass:
return {}
return six.moves.urllib.request.getproxies()