Add oslo admin-assistant bot

This bot can currently do the following: * Periodically report on periodic status jobs in either simple or verbose mode. * Find a projects meeting notes. * Run in IRC (it should also work in slack, or other errbot supported backend). Change-Id: I5df76ecd1352d4e3fb2a5a238163cf65257fc440
2016-07-18 11:45:53 -07:00 · 2016-07-18 11:45:53 -07:00 · 5b1f6f6b47
commit 5b1f6f6b47
parent 2b97634a01
5 changed files with 509 additions and 0 deletions
--- a/oslobot/README.rst
+++ b/oslobot/README.rst
@ -0,0 +1,26 @@
+==============
+Oslo admin bot
+==============
+
+A tiny bot that will help in doing some periodic checks and
+other oslo (the project) activities such as stating when weekly
+meetings are and (periodically) checking (and reporting on) the periodic
+oslo jobs and various other nifty functionality that we can think of
+in the future (if only we *all* had an administrative assistant).
+
+How to use it
+=============
+
+0. Read up on `errbot`_
+1. Setup a `virtualenv`_
+2. Enter that `virtualenv`_
+3. Install the ``requirements.txt`` into that virtualenv, typically
+   performed via ``pip install -r requirements.txt`` after doing
+   ``pip install pip --upgrade`` to get a newer pip package.
+4. Adjust ``config.py`` and provide it a valid (or unique IRC
+   nickname and admins).
+5. Run ``errbot -d -p $PWD/oslobot.pid``
+6. Profit.
+
+.. _virtualenv: https://virtualenv.pypa.io/en/stable/
+.. _errbot: http://errbot.io/
--- a/oslobot/config.py
+++ b/oslobot/config.py
@ -0,0 +1,50 @@
+import logging
+import os
+
+BOT_DATA_DIR = os.path.join(os.getcwd(), 'data')
+BOT_EXTRA_PLUGIN_DIR = os.path.join(os.getcwd(), 'plugins')
+
+# We aren't really using this feature, so memory is fine.
+STORAGE = 'Memory'
+
+BOT_LOG_FILE = None
+BOT_LOG_LEVEL = logging.DEBUG
+
+# The admins that can send the bot special commands...
+#
+# For now just taken from current oslo-core list (we should make
+# this more dynamic in the future, perhaps reading the list from
+# gerrit on startup?).
+BOT_ADMINS = [
+    'bknudson',
+    'bnemec',
+    'dhellmann',
+    'dims',
+    'flaper87',
+    'gcb',
+    'harlowja',
+    'haypo',
+    'jd__',
+    'lifeless',
+    'lxsli',
+    'mikal',
+    'Nakato',
+    # TODO(harlowja): Does case matter?
+    'nakato',
+    'rbradfor',
+    'sileht',
+]
+
+# The following will change depending on the backend selected...
+BACKEND = 'IRC'
+BOT_IDENTITY = {
+    'server': 'chat.freenode.net',
+    'nickname': 'oslobot',
+}
+COMPACT_OUTPUT = False
+CORE_PLUGINS = ('ACLs', 'Help', 'Health', 'Plugins', 'ChatRoom')
+
+# Rooms we will join by default.
+CHATROOM_PRESENCE = [
+    '#openstack-oslo',
+]
--- a/oslobot/plugins/oslobot/oslobot.plug
+++ b/oslobot/plugins/oslobot/oslobot.plug
@ -0,0 +1,10 @@
+[Core]
+Name = oslobot
+Module = oslobot
+
+[Documentation]
+Description = This plugin is the goto bot for oslo admin-assistant activities.
+
+[Python]
+Version = 2+
+
--- a/oslobot/plugins/oslobot/oslobot.py
+++ b/oslobot/plugins/oslobot/oslobot.py
@ -0,0 +1,413 @@
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import collections
+import copy
+from datetime import date
+import json
+import re
+
+from concurrent import futures
+import six
+from six.moves import http_client
+
+from dateutil import parser
+from dateutil.relativedelta import relativedelta
+
+import feedparser
+from oslo_utils import timeutils
+import requests
+from six.moves.urllib.parse import urlencode as compat_urlencode
+from tabulate import tabulate
+
+from errbot import botcmd
+from errbot import BotPlugin
+
+BAD_VALUE = '??'
+NA_VALUE = "N/A"
+
+
+def str_split(text):
+    return text.split()
+
+
+class GoogleShortener(object):
+    """Shortener that uses google shortening service (requires api key).
+
+    See: https://developers.google.com/url-shortener/v1/
+    """
+
+    base_request_url = 'https://www.googleapis.com/urlshortener/v1/url?'
+
+    def __init__(self, log, api_key,
+                 timeout=None, cache=None):
+        self.log = log
+        self.api_key = api_key
+        self.timeout = timeout
+        if cache is None:
+            cache = {}
+        self.cache = cache
+
+    def safe_shorten(self, long_url):
+        try:
+            return self.shorten(long_url)
+        except (IOError, ValueError, TypeError):
+            self.log.exception("Failed shortening due to unexpected error, "
+                               " providing back long url.")
+            return long_url
+
+    def shorten(self, long_url):
+        if long_url in self.cache:
+            return self.cache[long_url]
+        post_data = json.dumps({
+            'longUrl': long_url,
+        })
+        query_params = {
+            'key': self.api_key,
+        }
+        req_url = self.base_request_url + compat_urlencode(query_params)
+        try:
+            req = requests.post(req_url, data=post_data,
+                                headers={'content-type': 'application/json'},
+                                timeout=self.timeout)
+        except requests.Timeout:
+            raise IOError("Unable to shorten '%s' url"
+                          " due to http request timeout being"
+                          " reached" % (long_url))
+        else:
+            if req.status_code != http_client.OK:
+                raise IOError("Unable to shorten '%s' url due to http"
+                              " error '%s' (%s)" % (long_url, req.reason,
+                                                    req.status_code))
+            try:
+                small_url = req.json()['id']
+                self.cache[long_url] = small_url
+                return small_url
+            except (KeyError, ValueError, TypeError) as e:
+                raise IOError("Unable to shorten '%s' url due to request"
+                              " extraction error: %s" % (long_url, e))
+
+
+class OsloBotPlugin(BotPlugin):
+    OS_START_YEAR = 2010
+    DEF_FETCH_WORKERS = 3
+    DEF_CONFIG = {
+        # Check periodic jobs every 24 hours (by default); the jobs
+        # currently run daily, so running it quicker isn't to useful...
+        #
+        # If it is negative or zero, then that means never do it...
+        'periodic_check_frequency': -1,
+        'periodic_python_versions': [(3, 4), (2, 7)],
+        # TODO(harlowja): fetch this from the webpage itself?
+        'periodic_project_names': [
+            'ceilometer',
+            'cinder',
+            'cue',
+            'glance',
+            'heat',
+            'ironic',
+            'keystone',
+            'murano',
+            'neutron',
+            'nova',
+            'octavia',
+            'trove',
+        ],
+        'periodic_shorten': False,
+        'periodic_build_name_tpl': ('periodic-%(project_name)s-%(py_version)s'
+                                    '-with-oslo-master'),
+        # Fetch timeout for trying to get a projects health rss
+        # url (seems like this needs to be somewhat high as the
+        # infra system that gets this seems to not always be healthy).
+        'periodic_fetch_timeout': 30.0,
+        'periodic_connect_timeout': 1.0,
+        'periodic_url_tpl': ("http://health.openstack.org/runs/key/"
+                             "build_name/%(build_name)s/recent/rss"),
+        # See: https://pypi.python.org/pypi/tabulate
+        'tabulate_format': 'plain',
+        'periodic_exclude_when': {
+            # Exclude failure results that are more than X months old...
+            #
+            # See: https://dateutil.readthedocs.io/en/stable/relativedelta.html
+            'months': -1,
+        },
+        'meeting_team': 'oslo',
+        'meeting_url_tpl': ("http://eavesdrop.openstack.org"
+                            "/meetings/%(team)s/%(year)s/"),
+        'meeting_fetch_timeout': 10.0,
+        # Required if shortening is enabled, see,
+        # https://developers.google.com/url-shortener/v1/getting_started#APIKey
+        'shortener_api_key': "",
+        'shortener_fetch_timeout': 5.0,
+        'shortener_connect_timeout': 1.0,
+    }
+    """
+    The configuration mechanism for errbot is sorta unique so
+    check over
+    http://errbot.io/en/latest/user_guide/administration.html#configuration
+    before diving to deep here.
+    """
+
+    def configure(self, configuration):
+        if not configuration:
+            configuration = {}
+        configuration.update(copy.deepcopy(self.DEF_CONFIG))
+        super(OsloBotPlugin, self).configure(configuration)
+        self.log.debug("Bot configuration: %s", self.config)
+        self.executor = None
+
+    @botcmd(split_args_with=str_split, historize=False)
+    def meeting_notes(self, msg, args):
+        """Returns the latest project meeting notes url."""
+        def extract_meeting_url(team, meeting_url, resp):
+            if resp.status_code != http_client.OK:
+                return None
+            matches = []
+            # Crappy html parsing...
+            for m in re.findall("(%s.+?[.]html)" % team, resp.text):
+                if m.endswith(".log.html"):
+                    continue
+                if m not in matches:
+                    matches.append(m)
+            if matches:
+                return meeting_url + matches[-1]
+            return None
+        self.log.debug("Got request to fetch url"
+                       " to last meeting notes from '%s'"
+                       " with args %s'", msg.frm, args)
+        now_year = date.today().year
+        if args:
+            meeting_url_data = {
+                'team': args[0],
+            }
+        else:
+            meeting_url_data = {
+                'team': self.config['meeting_team'],
+            }
+        # No meeting should happen before openstack even existed...
+        valid_meeting_url = None
+        while now_year >= self.OS_START_YEAR:
+            meeting_url_data['year'] = now_year
+            meeting_url = self.config['meeting_url_tpl'] % meeting_url_data
+            try:
+                resp = requests.get(
+                    meeting_url, timeout=self.config['meeting_fetch_timeout'])
+            except requests.Timeout:
+                # Bail immediately...
+                break
+            else:
+                valid_meeting_url = extract_meeting_url(
+                    meeting_url_data['team'], meeting_url, resp)
+                if valid_meeting_url:
+                    self.log.debug("Found valid last meeting url at %s for"
+                                   " team %s", valid_meeting_url,
+                                   meeting_url_data['team'])
+                    break
+                else:
+                    now_year -= 1
+        if valid_meeting_url:
+            content = "Last meeting url is %s" % valid_meeting_url
+        else:
+            content = ("Could not find meeting"
+                       " url for project %s" % meeting_url_data['team'])
+        self.send_public_or_private(msg, content, 'meeting notes')
+
+    def send_public_or_private(self, source_msg, content, kind):
+        if hasattr(source_msg.frm, 'room') and source_msg.is_group:
+            self.send(source_msg.frm.room, content)
+        elif source_msg.is_direct:
+            self.send(source_msg.frm, content)
+        else:
+            self.log.warn("No recipient targeted for %s request!", kind)
+
+    @botcmd(split_args_with=str_split, historize=False)
+    def check_periodics(self, msg, args):
+        """Returns current periodic job(s) status."""
+        self.log.debug("Got request to check periodic"
+                       " jobs from '%s' with args %s'", msg.frm, args)
+        self.send_public_or_private(
+            msg, self.fetch_periodics_table(project_names=args), 'check')
+
+    def report_on_feeds(self):
+        msg = self.fetch_periodics_table()
+        for room in self.rooms():
+            self.send(room, msg)
+
+    def fetch_periodics_table(self, project_names=None):
+        if not project_names:
+            project_names = list(self.config['periodic_project_names'])
+
+        def process_feed(feed):
+            cleaned_entries = []
+            if self.config['periodic_exclude_when']:
+                now = timeutils.utcnow(with_timezone=True)
+                expire_after = now + relativedelta(
+                    **self.config['periodic_exclude_when'])
+                for e in feed.entries:
+                    when_pub = parser.parse(e.published)
+                    if when_pub <= expire_after:
+                        continue
+                    cleaned_entries.append(e)
+            else:
+                cleaned_entries.extend(feed.entries)
+            if len(cleaned_entries) == 0:
+                return {
+                    'status': 'All OK (no recent failures)',
+                    'discarded': len(feed.entries) - len(cleaned_entries),
+                    'last_fail': NA_VALUE,
+                    'last_fail_url': NA_VALUE,
+                }
+            else:
+                fails = []
+                for e in cleaned_entries:
+                    fails.append((e, parser.parse(e.published)))
+                lastest_entry, latest_fail = sorted(
+                    fails, key=lambda e: e[1])[-1]
+                if latest_fail.tzinfo is not None:
+                    when = latest_fail.strftime(
+                        "%A %b, %e, %Y at %k:%M:%S %Z")
+                else:
+                    when = latest_fail.strftime("%A %b, %e, %Y at %k:%M:%S")
+                return {
+                    'status': "%s failures" % len(fails),
+                    'last_fail': when,
+                    'last_fail_url': lastest_entry.get("link", BAD_VALUE),
+                    'discarded': len(feed.entries) - len(cleaned_entries),
+                }
+
+        def process_req_completion(fut):
+            self.log.debug("Processing completion of '%s'", fut.rss_url)
+            try:
+                r = fut.result()
+            except requests.Timeout:
+                return {
+                    'status': 'Fetch timed out',
+                    'last_fail': BAD_VALUE,
+                    'last_fail_url': BAD_VALUE,
+                }
+            except futures.CancelledError:
+                return {
+                    'status': 'Fetch cancelled',
+                    'last_fail': BAD_VALUE,
+                    'last_fail_url': BAD_VALUE,
+                }
+            except Exception:
+                self.log.exception("Failed fetching!")
+                return {
+                    'status': 'Unknown fetch error',
+                    'last_fail': BAD_VALUE,
+                    'last_fail_url': BAD_VALUE,
+                }
+            else:
+                if (r.status_code == http_client.BAD_REQUEST
+                        and 'No Failed Runs' in r.text):
+                    return {
+                        'status': 'All OK (no recent failures)',
+                        'last_fail': NA_VALUE,
+                        'last_fail_url': NA_VALUE,
+                    }
+                elif r.status_code != http_client.OK:
+                    return {
+                        'status': 'Fetch failure (%s)' % r.reason,
+                        'last_fail': BAD_VALUE,
+                        'last_fail_url': BAD_VALUE,
+                    }
+                else:
+                    return process_feed(feedparser.parse(r.text))
+
+        rss_url_tpl = self.config['periodic_url_tpl']
+        build_name_tpl = self.config['periodic_build_name_tpl']
+        conn_kwargs = {
+            'timeout': (self.config['periodic_connect_timeout'],
+                        self.config['periodic_fetch_timeout']),
+        }
+        futs = set()
+        for project_name in project_names:
+            for py_ver in self.config['periodic_python_versions']:
+                py_ver_str = "py" + "".join(str(p) for p in py_ver)
+                build_name = build_name_tpl % {
+                    'project_name': project_name,
+                    'py_version': py_ver_str}
+                rss_url = rss_url_tpl % {'build_name': build_name}
+                self.log.debug("Scheduling call out to %s", rss_url)
+                fut = self.executor.submit(requests.get,
+                                           rss_url, **conn_kwargs)
+                # TODO(harlowja): don't touch the future class and
+                # do this in a more sane manner at some point...
+                fut.rss_url = rss_url
+                fut.build_name = build_name
+                fut.project_name = project_name
+                fut.py_version = py_ver
+                futs.add(fut)
+        self.log.debug("Waiting for %s fetch requests", len(futs))
+        results = collections.defaultdict(list)
+        for fut in futures.as_completed(futs):
+            results[fut.project_name].append(
+                (fut.py_version, process_req_completion(fut)))
+        tbl_headers = [
+            "Project",
+            "Status",
+            'Last failed',
+            "Last failed url",
+            'Discarded',
+        ]
+        tbl_body = []
+        if (self.config['periodic_shorten'] and
+                self.config['shortener_api_key']):
+            s = GoogleShortener(
+                self.log, self.config['shortener_api_key'],
+                timeout=(self.config['shortener_connect_timeout'],
+                         self.config['shortener_fetch_timeout']))
+            shorten_func = s.safe_shorten
+        else:
+            shorten_func = lambda long_url: long_url
+        for project_name in sorted(results.keys()):
+            # This should force sorting by python version...
+            for py_version, result in sorted(results[project_name]):
+                py_version = ".".join(str(p) for p in py_version)
+                last_fail_url = result['last_fail_url']
+                if (last_fail_url and
+                        last_fail_url not in [BAD_VALUE, NA_VALUE]):
+                    last_fail_url = shorten_func(last_fail_url)
+                tbl_body.append([
+                    project_name.title() + " (" + py_version + ")",
+                    result['status'],
+                    result['last_fail'],
+                    str(last_fail_url),
+                    str(result.get('discarded', 0)),
+                ])
+        buf = six.StringIO()
+        buf.write(tabulate(tbl_body, tbl_headers,
+                           tablefmt=self.config['tabulate_format']))
+        return buf.getvalue()
+
+    def get_configuration_template(self):
+        return copy.deepcopy(self.DEF_CONFIG)
+
+    def deactivate(self):
+        super(OsloBotPlugin, self).deactivate()
+        if self.executor is not None:
+            self.executor.shutdown()
+
+    def activate(self):
+        super(OsloBotPlugin, self).activate()
+        self.executor = futures.ThreadPoolExecutor(
+            max_workers=self.DEF_FETCH_WORKERS)
+        try:
+            if self.config['periodic_check_frequency'] > 0:
+                self.start_poller(self.config['periodic_check_frequency'],
+                                  self.report_on_feeds)
+        except KeyError:
+            pass
--- a/oslobot/requirements.txt
+++ b/oslobot/requirements.txt
@ -0,0 +1,10 @@
+errbot
+errbot[irc]
+six
+irc
+python-dateutil
+requests
+oslo.utils
+tabulate
+feedparser
+futures>=3.0;python_version=='2.7' or python_version=='2.6' # BSD