Ensure single instance for active event gathering
Active event gathering uses leader election to ensure that there is only one instance at a time receiving events. Change-Id: Ib9e095430fa82be2327dcf7fd01ee4275b17415f
This commit is contained in:
parent
7397d030ed
commit
ebcbb544be
|
@ -42,7 +42,7 @@ from zuul.driver.gerrit.gerritmodel import GerritChange, GerritTriggerEvent
|
|||
from zuul.driver.git.gitwatcher import GitWatcher
|
||||
from zuul.lib.logutil import get_annotated_logger
|
||||
from zuul.model import Ref, Tag, Branch, Project
|
||||
from zuul.zk.event_queues import ConnectionEventQueue
|
||||
from zuul.zk.event_queues import ConnectionEventQueue, EventReceiverElection
|
||||
|
||||
# HTTP timeout in seconds
|
||||
TIMEOUT = 30
|
||||
|
@ -335,6 +335,11 @@ class GerritWatcher(threading.Thread):
|
|||
self.hostname = hostname
|
||||
self.port = port
|
||||
self.gerrit_connection = gerrit_connection
|
||||
self._stop_event = threading.Event()
|
||||
self.watcher_election = EventReceiverElection(
|
||||
gerrit_connection.sched.zk_client,
|
||||
gerrit_connection.connection_name,
|
||||
"watcher")
|
||||
self.keepalive = keepalive
|
||||
self._stopped = False
|
||||
|
||||
|
@ -392,10 +397,6 @@ class GerritWatcher(threading.Thread):
|
|||
|
||||
if ret and ret not in [-1, 130]:
|
||||
raise Exception("Gerrit error executing stream-events")
|
||||
except Exception:
|
||||
self.log.exception("Exception on ssh event stream with %s:",
|
||||
self.gerrit_connection.connection_name)
|
||||
time.sleep(5)
|
||||
finally:
|
||||
# If we don't close on exceptions to connect we can leak the
|
||||
# connection and DoS Gerrit.
|
||||
|
@ -403,11 +404,18 @@ class GerritWatcher(threading.Thread):
|
|||
|
||||
def run(self):
|
||||
while not self._stopped:
|
||||
self._run()
|
||||
try:
|
||||
self.watcher_election.run(self._run)
|
||||
except Exception:
|
||||
self.log.exception("Exception on ssh event stream with %s:",
|
||||
self.gerrit_connection.connection_name)
|
||||
self._stop_event.wait(5)
|
||||
|
||||
def stop(self):
|
||||
self.log.debug("Stopping watcher")
|
||||
self._stopped = True
|
||||
self._stop_event.set()
|
||||
self.watcher_election.cancel(self._run)
|
||||
|
||||
|
||||
class GerritPoller(threading.Thread):
|
||||
|
@ -419,6 +427,8 @@ class GerritPoller(threading.Thread):
|
|||
threading.Thread.__init__(self)
|
||||
self.connection = connection
|
||||
self.last_merged_poll = 0
|
||||
self.poller_election = EventReceiverElection(
|
||||
connection.sched.zk_client, connection.connection_name, "poller")
|
||||
self._stopped = False
|
||||
self._stop_event = threading.Event()
|
||||
|
||||
|
@ -449,43 +459,30 @@ class GerritPoller(threading.Thread):
|
|||
}}
|
||||
|
||||
def _poll_checkers(self):
|
||||
try:
|
||||
for checker in self.connection.watched_checkers:
|
||||
changes = self.connection.get(
|
||||
'plugins/checks/checks.pending/?'
|
||||
'query=checker:%s+(state:NOT_STARTED)' % checker)
|
||||
for change in changes:
|
||||
for uuid, check in change['pending_checks'].items():
|
||||
event = self._makePendingCheckEvent(
|
||||
change, uuid, check)
|
||||
self.connection.addEvent(event)
|
||||
except Exception:
|
||||
self.log.exception("Exception on Gerrit poll with %s:",
|
||||
self.connection.connection_name)
|
||||
for checker in self.connection.watched_checkers:
|
||||
changes = self.connection.get(
|
||||
'plugins/checks/checks.pending/?'
|
||||
'query=checker:%s+(state:NOT_STARTED)' % checker)
|
||||
for change in changes:
|
||||
for uuid, check in change['pending_checks'].items():
|
||||
event = self._makePendingCheckEvent(
|
||||
change, uuid, check)
|
||||
self.connection.addEvent(event)
|
||||
|
||||
def _poll_merged_changes(self):
|
||||
try:
|
||||
now = datetime.datetime.utcnow()
|
||||
age = self.last_merged_poll
|
||||
if age:
|
||||
# Allow an extra 4 seconds for request time
|
||||
age = int(math.ceil((now - age).total_seconds())) + 4
|
||||
changes = self.connection.simpleQueryHTTP(
|
||||
"status:merged -age:%ss" % (age,))
|
||||
self.last_merged_poll = now
|
||||
for change in changes:
|
||||
event = self._makeChangeMergedEvent(change)
|
||||
self.connection.addEvent(event)
|
||||
except Exception:
|
||||
self.log.exception("Exception on Gerrit poll with %s:",
|
||||
self.connection.connection_name)
|
||||
now = datetime.datetime.utcnow()
|
||||
age = self.last_merged_poll
|
||||
if age:
|
||||
# Allow an extra 4 seconds for request time
|
||||
age = int(math.ceil((now - age).total_seconds())) + 4
|
||||
changes = self.connection.simpleQueryHTTP(
|
||||
"status:merged -age:%ss" % (age,))
|
||||
self.last_merged_poll = now
|
||||
for change in changes:
|
||||
event = self._makeChangeMergedEvent(change)
|
||||
self.connection.addEvent(event)
|
||||
|
||||
def _run(self):
|
||||
self._poll_checkers()
|
||||
if not self.connection.enable_stream_events:
|
||||
self._poll_merged_changes()
|
||||
|
||||
def run(self):
|
||||
last_start = time.time()
|
||||
while not self._stopped:
|
||||
next_start = last_start + self.poll_interval
|
||||
|
@ -493,12 +490,24 @@ class GerritPoller(threading.Thread):
|
|||
if self._stopped:
|
||||
break
|
||||
last_start = time.time()
|
||||
self._run()
|
||||
|
||||
self._poll_checkers()
|
||||
if not self.connection.enable_stream_events:
|
||||
self._poll_merged_changes()
|
||||
|
||||
def run(self):
|
||||
while not self._stopped:
|
||||
try:
|
||||
self.poller_election.run(self._run)
|
||||
except Exception:
|
||||
self.log.exception("Exception on Gerrit poll with %s:",
|
||||
self.connection.connection_name)
|
||||
|
||||
def stop(self):
|
||||
self.log.debug("Stopping watcher")
|
||||
self._stopped = True
|
||||
self._stop_event.set()
|
||||
self.poller_election.cancel()
|
||||
|
||||
|
||||
class GerritConnection(BaseConnection):
|
||||
|
@ -1540,7 +1549,8 @@ class GerritConnection(BaseConnection):
|
|||
self,
|
||||
self.baseurl,
|
||||
self.ref_watcher_poll_interval,
|
||||
self.refWatcherCallback)
|
||||
self.refWatcherCallback,
|
||||
election_name="ref-watcher")
|
||||
self.ref_watcher_thread.start()
|
||||
|
||||
def _stop_event_connector(self):
|
||||
|
|
|
@ -17,18 +17,19 @@
|
|||
import os
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
|
||||
import git
|
||||
|
||||
from zuul.driver.git.gitmodel import EMPTY_GIT_REF
|
||||
from zuul.zk.event_queues import EventReceiverElection
|
||||
|
||||
|
||||
# This class may be used by any driver to implement git head polling.
|
||||
class GitWatcher(threading.Thread):
|
||||
log = logging.getLogger("zuul.connection.git.watcher")
|
||||
|
||||
def __init__(self, connection, baseurl, poll_delay, callback):
|
||||
def __init__(self, connection, baseurl, poll_delay, callback,
|
||||
election_name="watcher"):
|
||||
"""Watch for branch changes
|
||||
|
||||
Watch every project listed in the connection and call a
|
||||
|
@ -43,6 +44,8 @@ class GitWatcher(threading.Thread):
|
|||
:param function callback:
|
||||
A callback method to be called for each updated ref. The sole
|
||||
argument is a dictionary describing the update.
|
||||
:param str election_name:
|
||||
Name to use in the Zookeeper election of the watcher.
|
||||
"""
|
||||
threading.Thread.__init__(self)
|
||||
self.daemon = True
|
||||
|
@ -50,8 +53,13 @@ class GitWatcher(threading.Thread):
|
|||
self.baseurl = baseurl
|
||||
self.poll_delay = poll_delay
|
||||
self._stopped = False
|
||||
self._stop_event = threading.Event()
|
||||
self.projects_refs = {}
|
||||
self.callback = callback
|
||||
self.watcher_election = EventReceiverElection(
|
||||
connection.sched.zk_client,
|
||||
connection.connection_name,
|
||||
election_name)
|
||||
# This is used by the test framework
|
||||
self._event_count = 0
|
||||
self._pause = False
|
||||
|
@ -110,37 +118,44 @@ class GitWatcher(threading.Thread):
|
|||
events.append(event)
|
||||
return events
|
||||
|
||||
def _run(self):
|
||||
def _poll(self):
|
||||
self.log.debug("Walk through projects refs for connection: %s" %
|
||||
self.connection.connection_name)
|
||||
try:
|
||||
for project in self.connection.projects:
|
||||
refs = self.lsRemote(project)
|
||||
self.log.debug("Read %s refs for project %s",
|
||||
len(refs), project)
|
||||
if not self.projects_refs.get(project):
|
||||
# State for this project does not exist yet so add it.
|
||||
# No event will be triggered in this loop as
|
||||
# projects_refs['project'] and refs are equal
|
||||
self.projects_refs[project] = refs
|
||||
events = self.compareRefs(project, refs)
|
||||
for project in self.connection.projects:
|
||||
refs = self.lsRemote(project)
|
||||
self.log.debug("Read %s refs for project %s",
|
||||
len(refs), project)
|
||||
if not self.projects_refs.get(project):
|
||||
# State for this project does not exist yet so add it.
|
||||
# No event will be triggered in this loop as
|
||||
# projects_refs['project'] and refs are equal
|
||||
self.projects_refs[project] = refs
|
||||
# Send events to the scheduler
|
||||
for event in events:
|
||||
self.log.debug("Sending event: %s" % event)
|
||||
self.callback(event)
|
||||
self._event_count += 1
|
||||
except Exception as e:
|
||||
self.log.debug("Unexpected issue in _run loop: %s" % str(e))
|
||||
events = self.compareRefs(project, refs)
|
||||
self.projects_refs[project] = refs
|
||||
# Send events to the scheduler
|
||||
for event in events:
|
||||
self.log.debug("Sending event: %s" % event)
|
||||
self.callback(event)
|
||||
self._event_count += 1
|
||||
|
||||
def run(self):
|
||||
def _run(self):
|
||||
while not self._stopped:
|
||||
if not self._pause:
|
||||
self._run()
|
||||
self._poll()
|
||||
# Polling wait delay
|
||||
else:
|
||||
self.log.debug("Watcher is on pause")
|
||||
time.sleep(self.poll_delay)
|
||||
self._stop_event.wait(self.poll_delay)
|
||||
|
||||
def run(self):
|
||||
while not self._stopped:
|
||||
try:
|
||||
self.watcher_election.run(self._run)
|
||||
except Exception:
|
||||
self.log.exception("Unexpected issue in _run loop:")
|
||||
|
||||
def stop(self):
|
||||
self.log.debug("Stopping watcher")
|
||||
self._stopped = True
|
||||
self._stop_event.set()
|
||||
self.watcher_election.cancel()
|
||||
|
|
|
@ -25,6 +25,7 @@ from contextlib import suppress
|
|||
|
||||
from kazoo.exceptions import NoNodeError
|
||||
from kazoo.protocol.states import EventType
|
||||
from kazoo.recipe.election import Election
|
||||
|
||||
from zuul import model
|
||||
from zuul.lib.collections import DefaultKeyDict
|
||||
|
@ -581,3 +582,13 @@ class ConnectionEventQueue(ZooKeeperEventQueue):
|
|||
event = model.ConnectionEvent.fromDict(data)
|
||||
event.ack_ref = ack_ref
|
||||
yield event
|
||||
|
||||
|
||||
class EventReceiverElection(Election):
|
||||
"""Election for a singleton event receiver."""
|
||||
|
||||
def __init__(self, client, connection_name, receiver_name):
|
||||
self.election_root = "/".join(
|
||||
(CONNECTION_ROOT, connection_name, f"election-{receiver_name}")
|
||||
)
|
||||
super().__init__(client.client, self.election_root)
|
||||
|
|
Loading…
Reference in New Issue