Use requests lib instead of low-level urllib calls
With requests library users can run stackalytics-processor from environments located behind HTTP/HTTPS proxy. Proxy address is configured by setting the environment variables HTTP_PROXY or HTTPS_PROXY. Closes-Bug: #1351136 Change-Id: I6a65afb0f99b351dc2183294d9127cbbebc35856
This commit is contained in:
@@ -16,5 +16,7 @@ psutil<2.0.0,>=1.1.1
|
|||||||
PyGithub
|
PyGithub
|
||||||
python-memcached>=1.56
|
python-memcached>=1.56
|
||||||
PyYAML>=3.1.0
|
PyYAML>=3.1.0
|
||||||
|
requests>=2.5.2
|
||||||
|
requests-file
|
||||||
sh
|
sh
|
||||||
six>=1.9.0
|
six>=1.9.0
|
||||||
|
@@ -15,8 +15,6 @@
|
|||||||
|
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
import six
|
import six
|
||||||
from six.moves import http_client
|
|
||||||
from six.moves.urllib import parse
|
|
||||||
|
|
||||||
from stackalytics.processor import utils
|
from stackalytics.processor import utils
|
||||||
|
|
||||||
@@ -54,14 +52,11 @@ def lp_profile_by_email(email):
|
|||||||
|
|
||||||
def lp_module_exists(module):
|
def lp_module_exists(module):
|
||||||
uri = LP_URI_DEVEL % module
|
uri = LP_URI_DEVEL % module
|
||||||
parsed_uri = parse.urlparse(uri)
|
request = utils.do_request(uri)
|
||||||
conn = http_client.HTTPConnection(parsed_uri.netloc)
|
|
||||||
conn.request('GET', parsed_uri.path)
|
|
||||||
res = conn.getresponse()
|
|
||||||
LOG.debug('Checked uri: %(uri)s, status: %(status)s',
|
LOG.debug('Checked uri: %(uri)s, status: %(status)s',
|
||||||
{'uri': uri, 'status': res.status})
|
{'uri': uri, 'status': request.status_code})
|
||||||
conn.close()
|
return request.status_code != 404
|
||||||
return res.status != 404
|
|
||||||
|
|
||||||
|
|
||||||
def lp_blueprint_generator(module):
|
def lp_blueprint_generator(module):
|
||||||
|
@@ -18,7 +18,6 @@ import re
|
|||||||
|
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
import six
|
import six
|
||||||
from six.moves import http_client
|
|
||||||
from six.moves.urllib import parse
|
from six.moves.urllib import parse
|
||||||
|
|
||||||
from stackalytics.processor import utils
|
from stackalytics.processor import utils
|
||||||
@@ -60,31 +59,25 @@ def _get_mail_archive_links(uri):
|
|||||||
return [parse.urljoin(uri, link) for link in links]
|
return [parse.urljoin(uri, link) for link in links]
|
||||||
|
|
||||||
|
|
||||||
def _link_content_changed(link, runtime_storage_inst):
|
def _uri_content_changed(uri, runtime_storage_inst):
|
||||||
LOG.debug('Check changes for mail archive located at uri: %s', link)
|
LOG.debug('Check changes for mail archive located at: %s', uri)
|
||||||
parsed_uri = parse.urlparse(link)
|
last_modified = utils.get_uri_last_modified(uri)
|
||||||
conn = http_client.HTTPConnection(parsed_uri.netloc)
|
|
||||||
conn.request('HEAD', parsed_uri.path)
|
|
||||||
res = conn.getresponse()
|
|
||||||
last_modified = res.getheader('last-modified')
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
if last_modified != runtime_storage_inst.get_by_key('mail_link:' + link):
|
if last_modified != runtime_storage_inst.get_by_key('mail_link:' + uri):
|
||||||
LOG.debug('Mail archive changed, last modified at: %s', last_modified)
|
LOG.debug('Mail archive changed, last modified at: %s', last_modified)
|
||||||
runtime_storage_inst.set_by_key('mail_link:' + link, last_modified)
|
runtime_storage_inst.set_by_key('mail_link:' + uri, last_modified)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _retrieve_mails(uri):
|
def _retrieve_mails(uri):
|
||||||
LOG.debug('Retrieving mail archive from uri: %s', uri)
|
LOG.debug('Retrieving mail archive from: %s', uri)
|
||||||
content = utils.read_uri(uri)
|
content = utils.read_gzip_from_uri(uri)
|
||||||
if not content:
|
if not content:
|
||||||
LOG.error('Error reading mail archive from uri: %s', uri)
|
LOG.error('Error reading mail archive from: %s', uri)
|
||||||
return
|
return
|
||||||
|
|
||||||
content = utils.gzip_decompress(content)
|
|
||||||
LOG.debug('Mail archive is loaded, start processing')
|
LOG.debug('Mail archive is loaded, start processing')
|
||||||
|
|
||||||
content += TRAILING_RECORD
|
content += TRAILING_RECORD
|
||||||
@@ -116,7 +109,7 @@ def log(uri, runtime_storage_inst):
|
|||||||
|
|
||||||
links = _get_mail_archive_links(uri)
|
links = _get_mail_archive_links(uri)
|
||||||
for link in links:
|
for link in links:
|
||||||
if _link_content_changed(link, runtime_storage_inst):
|
if _uri_content_changed(link, runtime_storage_inst):
|
||||||
for mail in _retrieve_mails(link):
|
for mail in _retrieve_mails(link):
|
||||||
LOG.debug('New mail: %s', mail['message_id'])
|
LOG.debug('New mail: %s', mail['message_id'])
|
||||||
yield mail
|
yield mail
|
||||||
|
@@ -17,8 +17,6 @@ import calendar
|
|||||||
import cgi
|
import cgi
|
||||||
import datetime
|
import datetime
|
||||||
import gzip
|
import gzip
|
||||||
import io
|
|
||||||
import json
|
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
@@ -26,6 +24,8 @@ import time
|
|||||||
import iso8601
|
import iso8601
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
|
import requests
|
||||||
|
import requests_file
|
||||||
import six
|
import six
|
||||||
|
|
||||||
|
|
||||||
@@ -117,30 +117,31 @@ user_agents = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def do_request(uri, method='get'):
|
||||||
|
with requests.Session() as session:
|
||||||
|
session.mount('file://', requests_file.FileAdapter())
|
||||||
|
user_agent = random.choice(user_agents)
|
||||||
|
|
||||||
|
return session.request(method, uri, headers={'User-Agent': user_agent})
|
||||||
|
|
||||||
|
|
||||||
def read_uri(uri):
|
def read_uri(uri):
|
||||||
try:
|
try:
|
||||||
req = six.moves.urllib.request.Request(
|
return do_request(uri).text
|
||||||
url=uri, headers={'User-Agent': random.choice(user_agents)})
|
|
||||||
fd = six.moves.urllib.request.urlopen(req)
|
|
||||||
if six.PY3:
|
|
||||||
fd = io.TextIOWrapper(fd)
|
|
||||||
raw = fd.read()
|
|
||||||
fd.close()
|
|
||||||
return raw
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.warn('Error "%(error)s" while reading uri %(uri)s',
|
LOG.warn('Error "%(error)s" retrieving uri %(uri)s',
|
||||||
{'error': e, 'uri': uri})
|
{'error': e, 'uri': uri})
|
||||||
|
|
||||||
|
|
||||||
def read_json_from_uri(uri):
|
def read_json_from_uri(uri):
|
||||||
try:
|
try:
|
||||||
return json.loads(read_uri(uri))
|
return do_request(uri).json()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.warn('Error "%(error)s" parsing json from uri %(uri)s',
|
LOG.warn('Error "%(error)s" parsing json from uri %(uri)s',
|
||||||
{'error': e, 'uri': uri})
|
{'error': e, 'uri': uri})
|
||||||
|
|
||||||
|
|
||||||
def gzip_decompress(content):
|
def _gzip_decompress(content):
|
||||||
if six.PY3:
|
if six.PY3:
|
||||||
return gzip.decompress(content).decode('utf8')
|
return gzip.decompress(content).decode('utf8')
|
||||||
else:
|
else:
|
||||||
@@ -148,6 +149,22 @@ def gzip_decompress(content):
|
|||||||
return gzip_fd.read()
|
return gzip_fd.read()
|
||||||
|
|
||||||
|
|
||||||
|
def read_gzip_from_uri(uri):
|
||||||
|
try:
|
||||||
|
return _gzip_decompress(do_request(uri).content)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warn('Error "%(error)s" retrieving uri %(uri)s',
|
||||||
|
{'error': e, 'uri': uri})
|
||||||
|
|
||||||
|
|
||||||
|
def get_uri_last_modified(uri):
|
||||||
|
try:
|
||||||
|
return do_request(uri, method='head').headers['last-modified']
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warn('Error "%(error)s" retrieving uri %(uri)s',
|
||||||
|
{'error': e, 'uri': uri})
|
||||||
|
|
||||||
|
|
||||||
def cmp_to_key(mycmp): # ported from python 3
|
def cmp_to_key(mycmp): # ported from python 3
|
||||||
"""Convert a cmp= function into a key= function."""
|
"""Convert a cmp= function into a key= function."""
|
||||||
class K(object):
|
class K(object):
|
||||||
|
Reference in New Issue
Block a user