Use a sensible SQLAlchemy session model
The existing db session strategy was inherited from a bunch of shell scripts that ran once in a single thread and exited. The surprising thing is that even worked at all. This change replaces that "strategy" with one where each thread clearly begins a new session as a context manager and passes that around to functions that need the DB. A thread-local session is used for convenience and extra safety. This also adds a fake provider that will produce fake images and servers quickly without needing a real nova or jenkins. This was used to develop the database change. Also some minor logging changes and very brief developer docs. Change-Id: I45e6564cb061f81d79c47a31e17f5d85cd1d9306
This commit is contained in:
parent
35d66f0d77
commit
a5a78ef441
|
@ -0,0 +1,13 @@
|
||||||
|
Developer setup:
|
||||||
|
|
||||||
|
mysql -u root
|
||||||
|
|
||||||
|
mysql> create database nodepool;
|
||||||
|
mysql> GRANT ALL ON nodepool.* TO 'nodepool'@'localhost';
|
||||||
|
mysql> flush privileges;
|
||||||
|
|
||||||
|
nodepool -d -c tools/fake.yaml
|
||||||
|
|
||||||
|
After each run (the fake nova provider is only in-memory):
|
||||||
|
|
||||||
|
mysql> delete from snapshot_image; delete from node;
|
|
@ -0,0 +1,110 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Copyright 2013 OpenStack Foundation
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
import novaclient
|
||||||
|
|
||||||
|
|
||||||
|
class Dummy(object):
|
||||||
|
def __init__(self, **kw):
|
||||||
|
for k, v in kw.items():
|
||||||
|
setattr(self, k, v)
|
||||||
|
|
||||||
|
def delete(self):
|
||||||
|
self.manager.delete(self)
|
||||||
|
|
||||||
|
|
||||||
|
class FakeList(object):
|
||||||
|
def __init__(self, l):
|
||||||
|
self._list = l
|
||||||
|
|
||||||
|
def list(self):
|
||||||
|
return self._list
|
||||||
|
|
||||||
|
def find(self, name):
|
||||||
|
for x in self._list:
|
||||||
|
if x.name == name:
|
||||||
|
return x
|
||||||
|
|
||||||
|
def get(self, id):
|
||||||
|
for x in self._list:
|
||||||
|
if x.id == id:
|
||||||
|
return x
|
||||||
|
raise novaclient.exceptions.NotFound(404)
|
||||||
|
|
||||||
|
def _finish(self, obj, delay, status):
|
||||||
|
time.sleep(delay)
|
||||||
|
obj.status = status
|
||||||
|
|
||||||
|
def delete(self, obj):
|
||||||
|
self._list.remove(obj)
|
||||||
|
|
||||||
|
def create(self, **kw):
|
||||||
|
s = Dummy(id=uuid.uuid4().hex,
|
||||||
|
name=kw['name'],
|
||||||
|
status='BUILD',
|
||||||
|
addresses=dict(public=[dict(version=4, addr='fake')]),
|
||||||
|
manager=self)
|
||||||
|
self._list.append(s)
|
||||||
|
t = threading.Thread(target=self._finish, args=(s, 0.5, 'ACTIVE'))
|
||||||
|
t.start()
|
||||||
|
return s
|
||||||
|
|
||||||
|
def create_image(self, server, name):
|
||||||
|
x = self.api.images.create(name=name)
|
||||||
|
return x.id
|
||||||
|
|
||||||
|
|
||||||
|
class FakeHTTPClient(object):
|
||||||
|
def get(self, path):
|
||||||
|
if path == '/extensions':
|
||||||
|
return None, dict(extensions=dict())
|
||||||
|
|
||||||
|
|
||||||
|
class FakeClient(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.flavors = FakeList([Dummy(id='f1', ram=8192)])
|
||||||
|
self.images = FakeList([Dummy(id='i1', name='Fake Precise')])
|
||||||
|
self.client = FakeHTTPClient()
|
||||||
|
self.servers = FakeList([])
|
||||||
|
self.servers.api = self
|
||||||
|
|
||||||
|
|
||||||
|
class FakeSSHClient(object):
|
||||||
|
def ssh(self, description, cmd):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def scp(self, src, dest):
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class FakeJenkins(object):
|
||||||
|
def __init__(self):
|
||||||
|
self._nodes = {}
|
||||||
|
|
||||||
|
def node_exists(self, name):
|
||||||
|
return name in self._nodes
|
||||||
|
|
||||||
|
def create_node(self, name, **kw):
|
||||||
|
self._nodes[name] = kw
|
||||||
|
|
||||||
|
def delete_node(self, name):
|
||||||
|
del self._nodes[name]
|
||||||
|
|
||||||
|
|
||||||
|
FAKE_CLIENT = FakeClient()
|
|
@ -42,7 +42,7 @@ STATE_NAMES = {
|
||||||
|
|
||||||
from sqlalchemy import Table, Column, Integer, String, \
|
from sqlalchemy import Table, Column, Integer, String, \
|
||||||
MetaData, create_engine
|
MetaData, create_engine
|
||||||
from sqlalchemy.orm import mapper
|
from sqlalchemy.orm import scoped_session, mapper
|
||||||
from sqlalchemy.orm.session import Session, sessionmaker
|
from sqlalchemy.orm.session import Session, sessionmaker
|
||||||
|
|
||||||
metadata = MetaData()
|
metadata = MetaData()
|
||||||
|
@ -154,10 +154,27 @@ mapper(SnapshotImage, snapshot_image_table,
|
||||||
|
|
||||||
class NodeDatabase(object):
|
class NodeDatabase(object):
|
||||||
def __init__(self, dburi):
|
def __init__(self, dburi):
|
||||||
engine = create_engine(dburi, echo=False)
|
self.engine = create_engine(dburi, echo=False)
|
||||||
metadata.create_all(engine)
|
metadata.create_all(self.engine)
|
||||||
Session = sessionmaker(bind=engine, autoflush=True, autocommit=False)
|
self.session_factory = sessionmaker(bind=self.engine)
|
||||||
self.session = Session()
|
self.session = scoped_session(self.session_factory)
|
||||||
|
|
||||||
|
def getSession(self):
|
||||||
|
return NodeDatabaseSession(self.session)
|
||||||
|
|
||||||
|
|
||||||
|
class NodeDatabaseSession(object):
|
||||||
|
def __init__(self, session):
|
||||||
|
self.session = session
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, etype, value, tb):
|
||||||
|
if etype:
|
||||||
|
self.session().rollback()
|
||||||
|
else:
|
||||||
|
self.session().commit()
|
||||||
|
|
||||||
def print_state(self):
|
def print_state(self):
|
||||||
for provider_name in self.getProviders():
|
for provider_name in self.getProviders():
|
||||||
|
@ -182,40 +199,41 @@ class NodeDatabase(object):
|
||||||
node.state_time, node.ip)
|
node.state_time, node.ip)
|
||||||
|
|
||||||
def abort(self):
|
def abort(self):
|
||||||
self.session.rollback()
|
self.session().rollback()
|
||||||
|
|
||||||
def commit(self):
|
def commit(self):
|
||||||
self.session.commit()
|
self.session().commit()
|
||||||
|
|
||||||
def delete(self, obj):
|
def delete(self, obj):
|
||||||
self.session.delete(obj)
|
self.session().delete(obj)
|
||||||
|
|
||||||
def getProviders(self):
|
def getProviders(self):
|
||||||
return [
|
return [
|
||||||
x.provider_name for x in
|
x.provider_name for x in
|
||||||
self.session.query(SnapshotImage).distinct(
|
self.session().query(SnapshotImage).distinct(
|
||||||
snapshot_image_table.c.provider_name).all()]
|
snapshot_image_table.c.provider_name).all()]
|
||||||
|
|
||||||
def getImages(self, provider_name):
|
def getImages(self, provider_name):
|
||||||
return [
|
return [
|
||||||
x.image_name for x in
|
x.image_name for x in
|
||||||
self.session.query(SnapshotImage).filter(
|
self.session().query(SnapshotImage).filter(
|
||||||
snapshot_image_table.c.provider_name == provider_name
|
snapshot_image_table.c.provider_name == provider_name
|
||||||
).distinct(snapshot_image_table.c.image_name).all()]
|
).distinct(snapshot_image_table.c.image_name).all()]
|
||||||
|
|
||||||
def getSnapshotImages(self):
|
def getSnapshotImages(self):
|
||||||
return self.session.query(SnapshotImage).order_by(
|
return self.session().query(SnapshotImage).order_by(
|
||||||
snapshot_image_table.c.provider_name,
|
snapshot_image_table.c.provider_name,
|
||||||
snapshot_image_table.c.image_name).all()
|
snapshot_image_table.c.image_name).all()
|
||||||
|
|
||||||
def getSnapshotImage(self, id):
|
def getSnapshotImage(self, image_id):
|
||||||
images = self.session.query(SnapshotImage).filter_by(id=id).all()
|
images = self.session().query(SnapshotImage).filter_by(
|
||||||
|
id=image_id).all()
|
||||||
if not images:
|
if not images:
|
||||||
return None
|
return None
|
||||||
return images[0]
|
return images[0]
|
||||||
|
|
||||||
def getCurrentSnapshotImage(self, provider_name, image_name):
|
def getCurrentSnapshotImage(self, provider_name, image_name):
|
||||||
images = self.session.query(SnapshotImage).filter(
|
images = self.session().query(SnapshotImage).filter(
|
||||||
snapshot_image_table.c.provider_name == provider_name,
|
snapshot_image_table.c.provider_name == provider_name,
|
||||||
snapshot_image_table.c.image_name == image_name,
|
snapshot_image_table.c.image_name == image_name,
|
||||||
snapshot_image_table.c.state == READY).order_by(
|
snapshot_image_table.c.state == READY).order_by(
|
||||||
|
@ -226,13 +244,13 @@ class NodeDatabase(object):
|
||||||
|
|
||||||
def createSnapshotImage(self, *args, **kwargs):
|
def createSnapshotImage(self, *args, **kwargs):
|
||||||
new = SnapshotImage(*args, **kwargs)
|
new = SnapshotImage(*args, **kwargs)
|
||||||
self.session.add(new)
|
self.session().add(new)
|
||||||
self.session.commit()
|
self.commit()
|
||||||
return new
|
return new
|
||||||
|
|
||||||
def getNodes(self, provider_name=None, image_name=None, target_name=None,
|
def getNodes(self, provider_name=None, image_name=None, target_name=None,
|
||||||
state=None):
|
state=None):
|
||||||
exp = self.session.query(Node).order_by(
|
exp = self.session().query(Node).order_by(
|
||||||
node_table.c.provider_name,
|
node_table.c.provider_name,
|
||||||
node_table.c.image_name)
|
node_table.c.image_name)
|
||||||
if provider_name:
|
if provider_name:
|
||||||
|
@ -247,24 +265,24 @@ class NodeDatabase(object):
|
||||||
|
|
||||||
def createNode(self, *args, **kwargs):
|
def createNode(self, *args, **kwargs):
|
||||||
new = Node(*args, **kwargs)
|
new = Node(*args, **kwargs)
|
||||||
self.session.add(new)
|
self.session().add(new)
|
||||||
self.session.commit()
|
self.commit()
|
||||||
return new
|
return new
|
||||||
|
|
||||||
def getNode(self, id):
|
def getNode(self, id):
|
||||||
nodes = self.session.query(Node).filter_by(id=id).all()
|
nodes = self.session().query(Node).filter_by(id=id).all()
|
||||||
if not nodes:
|
if not nodes:
|
||||||
return None
|
return None
|
||||||
return nodes[0]
|
return nodes[0]
|
||||||
|
|
||||||
def getNodeByHostname(self, hostname):
|
def getNodeByHostname(self, hostname):
|
||||||
nodes = self.session.query(Node).filter_by(hostname=hostname).all()
|
nodes = self.session().query(Node).filter_by(hostname=hostname).all()
|
||||||
if not nodes:
|
if not nodes:
|
||||||
return None
|
return None
|
||||||
return nodes[0]
|
return nodes[0]
|
||||||
|
|
||||||
def getNodeByNodename(self, nodename):
|
def getNodeByNodename(self, nodename):
|
||||||
nodes = self.session.query(Node).filter_by(nodename=nodename).all()
|
nodes = self.session().query(Node).filter_by(nodename=nodename).all()
|
||||||
if not nodes:
|
if not nodes:
|
||||||
return None
|
return None
|
||||||
return nodes[0]
|
return nodes[0]
|
||||||
|
|
|
@ -49,22 +49,22 @@ class NodeCompleteThread(threading.Thread):
|
||||||
threading.Thread.__init__(self)
|
threading.Thread.__init__(self)
|
||||||
self.nodename = nodename
|
self.nodename = nodename
|
||||||
self.nodepool = nodepool
|
self.nodepool = nodepool
|
||||||
self.db = nodedb.NodeDatabase(self.nodepool.config.dburi)
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
try:
|
try:
|
||||||
self.handleEvent()
|
with self.nodepool.db.getSession() as session:
|
||||||
|
self.handleEvent(session)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.log.exception("Exception handling event for %s:" %
|
self.log.exception("Exception handling event for %s:" %
|
||||||
self.nodename)
|
self.nodename)
|
||||||
|
|
||||||
def handleEvent(self):
|
def handleEvent(self, session):
|
||||||
node = self.db.getNodeByNodename(self.nodename)
|
node = session.getNodeByNodename(self.nodename)
|
||||||
if not node:
|
if not node:
|
||||||
self.log.debug("Unable to find node with nodename: %s" %
|
self.log.debug("Unable to find node with nodename: %s" %
|
||||||
self.nodename)
|
self.nodename)
|
||||||
return
|
return
|
||||||
self.nodepool.deleteNode(node)
|
self.nodepool.deleteNode(session, node)
|
||||||
|
|
||||||
|
|
||||||
class NodeUpdateListener(threading.Thread):
|
class NodeUpdateListener(threading.Thread):
|
||||||
|
@ -78,7 +78,6 @@ class NodeUpdateListener(threading.Thread):
|
||||||
self.socket.setsockopt(zmq.SUBSCRIBE, event_filter)
|
self.socket.setsockopt(zmq.SUBSCRIBE, event_filter)
|
||||||
self.socket.connect(addr)
|
self.socket.connect(addr)
|
||||||
self._stopped = False
|
self._stopped = False
|
||||||
self.db = nodedb.NodeDatabase(self.nodepool.config.dburi)
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
while not self._stopped:
|
while not self._stopped:
|
||||||
|
@ -107,14 +106,15 @@ class NodeUpdateListener(threading.Thread):
|
||||||
topic)
|
topic)
|
||||||
|
|
||||||
def handleStartPhase(self, nodename):
|
def handleStartPhase(self, nodename):
|
||||||
node = self.db.getNodeByNodename(nodename)
|
with self.nodepool.db.getSession() as session:
|
||||||
|
node = session.getNodeByNodename(nodename)
|
||||||
if not node:
|
if not node:
|
||||||
self.log.debug("Unable to find node with nodename: %s" %
|
self.log.debug("Unable to find node with nodename: %s" %
|
||||||
nodename)
|
nodename)
|
||||||
return
|
return
|
||||||
self.log.info("Setting node id: %s to USED" % node.id)
|
self.log.info("Setting node id: %s to USED" % node.id)
|
||||||
node.state = nodedb.USED
|
node.state = nodedb.USED
|
||||||
self.nodepool.updateStats(node.provider_name)
|
self.nodepool.updateStats(session, node.provider_name)
|
||||||
|
|
||||||
def handleCompletePhase(self, nodename):
|
def handleCompletePhase(self, nodename):
|
||||||
t = NodeCompleteThread(self.nodepool, nodename)
|
t = NodeCompleteThread(self.nodepool, nodename)
|
||||||
|
@ -133,18 +133,24 @@ class NodeLauncher(threading.Thread):
|
||||||
self.nodepool = nodepool
|
self.nodepool = nodepool
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
try:
|
||||||
|
self._run()
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Exception in run method:")
|
||||||
|
|
||||||
|
def _run(self):
|
||||||
|
with self.nodepool.db.getSession() as session:
|
||||||
self.log.debug("Launching node id: %s" % self.node_id)
|
self.log.debug("Launching node id: %s" % self.node_id)
|
||||||
try:
|
try:
|
||||||
self.db = nodedb.NodeDatabase(self.nodepool.config.dburi)
|
self.node = session.getNode(self.node_id)
|
||||||
self.node = self.db.getNode(self.node_id)
|
|
||||||
self.client = utils.get_client(self.provider)
|
self.client = utils.get_client(self.provider)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.log.exception("Exception preparing to launch node id: %s:" %
|
self.log.exception("Exception preparing to launch node id: %s:"
|
||||||
self.node_id)
|
% self.node_id)
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.launchNode()
|
self.launchNode(session)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.log.exception("Exception launching node id: %s:" %
|
self.log.exception("Exception launching node id: %s:" %
|
||||||
self.node_id)
|
self.node_id)
|
||||||
|
@ -155,7 +161,7 @@ class NodeLauncher(threading.Thread):
|
||||||
self.node_id)
|
self.node_id)
|
||||||
return
|
return
|
||||||
|
|
||||||
def launchNode(self):
|
def launchNode(self, session):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
hostname = '%s-%s-%s.slave.openstack.org' % (
|
hostname = '%s-%s-%s.slave.openstack.org' % (
|
||||||
|
@ -165,7 +171,7 @@ class NodeLauncher(threading.Thread):
|
||||||
self.node.target_name = self.target.name
|
self.node.target_name = self.target.name
|
||||||
|
|
||||||
flavor = utils.get_flavor(self.client, self.image.min_ram)
|
flavor = utils.get_flavor(self.client, self.image.min_ram)
|
||||||
snap_image = self.db.getCurrentSnapshotImage(
|
snap_image = session.getCurrentSnapshotImage(
|
||||||
self.provider.name, self.image.name)
|
self.provider.name, self.image.name)
|
||||||
if not snap_image:
|
if not snap_image:
|
||||||
raise Exception("Unable to find current snapshot image %s in %s" %
|
raise Exception("Unable to find current snapshot image %s in %s" %
|
||||||
|
@ -179,7 +185,7 @@ class NodeLauncher(threading.Thread):
|
||||||
server, key = utils.create_server(self.client, hostname,
|
server, key = utils.create_server(self.client, hostname,
|
||||||
remote_snap_image, flavor)
|
remote_snap_image, flavor)
|
||||||
self.node.external_id = server.id
|
self.node.external_id = server.id
|
||||||
self.db.commit()
|
session.commit()
|
||||||
|
|
||||||
self.log.debug("Waiting for server %s for node id: %s" %
|
self.log.debug("Waiting for server %s for node id: %s" %
|
||||||
(server.id, self.node.id))
|
(server.id, self.node.id))
|
||||||
|
@ -213,7 +219,7 @@ class NodeLauncher(threading.Thread):
|
||||||
# Jenkins might immediately use the node before we've updated
|
# Jenkins might immediately use the node before we've updated
|
||||||
# the state:
|
# the state:
|
||||||
self.node.state = nodedb.READY
|
self.node.state = nodedb.READY
|
||||||
self.nodepool.updateStats(self.provider.name)
|
self.nodepool.updateStats(session, self.provider.name)
|
||||||
self.log.info("Node id: %s is ready" % self.node.id)
|
self.log.info("Node id: %s is ready" % self.node.id)
|
||||||
|
|
||||||
if self.target.jenkins_url:
|
if self.target.jenkins_url:
|
||||||
|
@ -222,7 +228,7 @@ class NodeLauncher(threading.Thread):
|
||||||
self.log.info("Node id: %s added to jenkins" % self.node.id)
|
self.log.info("Node id: %s added to jenkins" % self.node.id)
|
||||||
|
|
||||||
def createJenkinsNode(self):
|
def createJenkinsNode(self):
|
||||||
jenkins = myjenkins.Jenkins(self.target.jenkins_url,
|
jenkins = utils.get_jenkins(self.target.jenkins_url,
|
||||||
self.target.jenkins_user,
|
self.target.jenkins_user,
|
||||||
self.target.jenkins_apikey)
|
self.target.jenkins_apikey)
|
||||||
node_desc = 'Dynamic single use %s node' % self.image.name
|
node_desc = 'Dynamic single use %s node' % self.image.name
|
||||||
|
@ -267,19 +273,27 @@ class ImageUpdater(threading.Thread):
|
||||||
self.scriptdir = self.nodepool.config.scriptdir
|
self.scriptdir = self.nodepool.config.scriptdir
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
try:
|
||||||
|
self._run()
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Exception in run method:")
|
||||||
|
|
||||||
|
def _run(self):
|
||||||
|
with self.nodepool.db.getSession() as session:
|
||||||
self.log.debug("Updating image %s in %s " % (self.image.name,
|
self.log.debug("Updating image %s in %s " % (self.image.name,
|
||||||
self.provider.name))
|
self.provider.name))
|
||||||
try:
|
try:
|
||||||
self.db = nodedb.NodeDatabase(self.nodepool.config.dburi)
|
self.snap_image = session.getSnapshotImage(
|
||||||
self.snap_image = self.db.getSnapshotImage(self.snap_image_id)
|
self.snap_image_id)
|
||||||
self.client = utils.get_client(self.provider)
|
self.client = utils.get_client(self.provider)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.log.exception("Exception preparing to update image %s in %s:"
|
self.log.exception("Exception preparing to update image %s "
|
||||||
% (self.image.name, self.provider.name))
|
"in %s:" % (self.image.name,
|
||||||
|
self.provider.name))
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.updateImage()
|
self.updateImage(session)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.log.exception("Exception updating image %s in %s:" %
|
self.log.exception("Exception updating image %s in %s:" %
|
||||||
(self.image.name, self.provider.name))
|
(self.image.name, self.provider.name))
|
||||||
|
@ -291,7 +305,7 @@ class ImageUpdater(threading.Thread):
|
||||||
self.snap_image.id)
|
self.snap_image.id)
|
||||||
return
|
return
|
||||||
|
|
||||||
def updateImage(self):
|
def updateImage(self, session):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
timestamp = int(start_time)
|
timestamp = int(start_time)
|
||||||
|
|
||||||
|
@ -308,7 +322,7 @@ class ImageUpdater(threading.Thread):
|
||||||
self.snap_image.hostname = hostname
|
self.snap_image.hostname = hostname
|
||||||
self.snap_image.version = timestamp
|
self.snap_image.version = timestamp
|
||||||
self.snap_image.server_external_id = server.id
|
self.snap_image.server_external_id = server.id
|
||||||
self.db.commit()
|
session.commit()
|
||||||
|
|
||||||
self.log.debug("Image id: %s waiting for server %s" %
|
self.log.debug("Image id: %s waiting for server %s" %
|
||||||
(self.snap_image.id, server.id))
|
(self.snap_image.id, server.id))
|
||||||
|
@ -322,7 +336,7 @@ class ImageUpdater(threading.Thread):
|
||||||
|
|
||||||
image = utils.create_image(self.client, server, hostname)
|
image = utils.create_image(self.client, server, hostname)
|
||||||
self.snap_image.external_id = image.id
|
self.snap_image.external_id = image.id
|
||||||
self.db.commit()
|
session.commit()
|
||||||
self.log.debug("Image id: %s building image %s" %
|
self.log.debug("Image id: %s building image %s" %
|
||||||
(self.snap_image.id, image.id))
|
(self.snap_image.id, image.id))
|
||||||
# It can take a _very_ long time for Rackspace 1.0 to save an image
|
# It can take a _very_ long time for Rackspace 1.0 to save an image
|
||||||
|
@ -339,6 +353,7 @@ class ImageUpdater(threading.Thread):
|
||||||
statsd.incr(key)
|
statsd.incr(key)
|
||||||
|
|
||||||
self.snap_image.state = nodedb.READY
|
self.snap_image.state = nodedb.READY
|
||||||
|
session.commit()
|
||||||
self.log.info("Image %s in %s is ready" % (hostname,
|
self.log.info("Image %s in %s is ready" % (hostname,
|
||||||
self.provider.name))
|
self.provider.name))
|
||||||
|
|
||||||
|
@ -426,6 +441,7 @@ class NodePool(threading.Thread):
|
||||||
self.zmq_context = None
|
self.zmq_context = None
|
||||||
self.zmq_listeners = {}
|
self.zmq_listeners = {}
|
||||||
self.db = None
|
self.db = None
|
||||||
|
self.dburi = None
|
||||||
self.apsched = apscheduler.scheduler.Scheduler()
|
self.apsched = apscheduler.scheduler.Scheduler()
|
||||||
self.apsched.start()
|
self.apsched.start()
|
||||||
|
|
||||||
|
@ -452,7 +468,7 @@ class NodePool(threading.Thread):
|
||||||
self.apsched.unschedule_job(self.update_job)
|
self.apsched.unschedule_job(self.update_job)
|
||||||
parts = update_cron.split()
|
parts = update_cron.split()
|
||||||
minute, hour, dom, month, dow = parts[:5]
|
minute, hour, dom, month, dow = parts[:5]
|
||||||
self.apsched.add_cron_job(self.updateImages,
|
self.apsched.add_cron_job(self._doUpdateImages,
|
||||||
day=dom,
|
day=dom,
|
||||||
day_of_week=dow,
|
day_of_week=dow,
|
||||||
hour=hour,
|
hour=hour,
|
||||||
|
@ -463,7 +479,7 @@ class NodePool(threading.Thread):
|
||||||
self.apsched.unschedule_job(self.cleanup_job)
|
self.apsched.unschedule_job(self.cleanup_job)
|
||||||
parts = cleanup_cron.split()
|
parts = cleanup_cron.split()
|
||||||
minute, hour, dom, month, dow = parts[:5]
|
minute, hour, dom, month, dow = parts[:5]
|
||||||
self.apsched.add_cron_job(self.periodicCleanup,
|
self.apsched.add_cron_job(self._doPeriodicCleanup,
|
||||||
day=dom,
|
day=dom,
|
||||||
day_of_week=dow,
|
day_of_week=dow,
|
||||||
hour=hour,
|
hour=hour,
|
||||||
|
@ -524,6 +540,8 @@ class NodePool(threading.Thread):
|
||||||
i.providers[p.name] = p
|
i.providers[p.name] = p
|
||||||
p.min_ready = provider['min-ready']
|
p.min_ready = provider['min-ready']
|
||||||
self.config = newconfig
|
self.config = newconfig
|
||||||
|
if self.config.dburi != self.dburi:
|
||||||
|
self.dburi = self.config.dburi
|
||||||
self.db = nodedb.NodeDatabase(self.config.dburi)
|
self.db = nodedb.NodeDatabase(self.config.dburi)
|
||||||
self.startUpdateListeners(config['zmq-publishers'])
|
self.startUpdateListeners(config['zmq-publishers'])
|
||||||
|
|
||||||
|
@ -545,15 +563,15 @@ class NodePool(threading.Thread):
|
||||||
self.zmq_listeners[addr] = listener
|
self.zmq_listeners[addr] = listener
|
||||||
listener.start()
|
listener.start()
|
||||||
|
|
||||||
def getNumNeededNodes(self, target, provider, image):
|
def getNumNeededNodes(self, session, target, provider, image):
|
||||||
# Count machines that are ready and machines that are building,
|
# Count machines that are ready and machines that are building,
|
||||||
# so that if the provider is very slow, we aren't queueing up tons
|
# so that if the provider is very slow, we aren't queueing up tons
|
||||||
# of machines to be built.
|
# of machines to be built.
|
||||||
n_ready = len(self.db.getNodes(provider.name, image.name, target.name,
|
n_ready = len(session.getNodes(provider.name, image.name, target.name,
|
||||||
nodedb.READY))
|
nodedb.READY))
|
||||||
n_building = len(self.db.getNodes(provider.name, image.name,
|
n_building = len(session.getNodes(provider.name, image.name,
|
||||||
target.name, nodedb.BUILDING))
|
target.name, nodedb.BUILDING))
|
||||||
n_provider = len(self.db.getNodes(provider.name))
|
n_provider = len(session.getNodes(provider.name))
|
||||||
num_to_launch = provider.min_ready - (n_ready + n_building)
|
num_to_launch = provider.min_ready - (n_ready + n_building)
|
||||||
|
|
||||||
# Don't launch more than our provider max
|
# Don't launch more than our provider max
|
||||||
|
@ -567,30 +585,37 @@ class NodePool(threading.Thread):
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
while not self._stopped:
|
while not self._stopped:
|
||||||
|
try:
|
||||||
self.loadConfig()
|
self.loadConfig()
|
||||||
self.checkForMissingImages()
|
with self.db.getSession() as session:
|
||||||
|
self._run(session)
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Exception in main loop:")
|
||||||
|
time.sleep(WATERMARK_SLEEP)
|
||||||
|
|
||||||
|
def _run(self, session):
|
||||||
|
self.checkForMissingImages(session)
|
||||||
for target in self.config.targets.values():
|
for target in self.config.targets.values():
|
||||||
self.log.debug("Examining target: %s" % target.name)
|
self.log.debug("Examining target: %s" % target.name)
|
||||||
for image in target.images.values():
|
for image in target.images.values():
|
||||||
for provider in image.providers.values():
|
for provider in image.providers.values():
|
||||||
num_to_launch = self.getNumNeededNodes(
|
num_to_launch = self.getNumNeededNodes(
|
||||||
target, provider, image)
|
session, target, provider, image)
|
||||||
if num_to_launch:
|
if num_to_launch:
|
||||||
self.log.info("Need to launch %s %s nodes for "
|
self.log.info("Need to launch %s %s nodes for "
|
||||||
"%s on %s" %
|
"%s on %s" %
|
||||||
(num_to_launch, image.name,
|
(num_to_launch, image.name,
|
||||||
target.name, provider.name))
|
target.name, provider.name))
|
||||||
for i in range(num_to_launch):
|
for i in range(num_to_launch):
|
||||||
snap_image = self.db.getCurrentSnapshotImage(
|
snap_image = session.getCurrentSnapshotImage(
|
||||||
provider.name, image.name)
|
provider.name, image.name)
|
||||||
if not snap_image:
|
if not snap_image:
|
||||||
self.log.debug("No current image for %s on %s"
|
self.log.debug("No current image for %s on %s"
|
||||||
% (provider.name, image.name))
|
% (provider.name, image.name))
|
||||||
else:
|
else:
|
||||||
self.launchNode(provider, image, target)
|
self.launchNode(session, provider, image, target)
|
||||||
time.sleep(WATERMARK_SLEEP)
|
|
||||||
|
|
||||||
def checkForMissingImages(self):
|
def checkForMissingImages(self, session):
|
||||||
# If we are missing an image, run the image update function
|
# If we are missing an image, run the image update function
|
||||||
# outside of its schedule.
|
# outside of its schedule.
|
||||||
missing = False
|
missing = False
|
||||||
|
@ -598,7 +623,7 @@ class NodePool(threading.Thread):
|
||||||
for image in target.images.values():
|
for image in target.images.values():
|
||||||
for provider in image.providers.values():
|
for provider in image.providers.values():
|
||||||
found = False
|
found = False
|
||||||
for snap_image in self.db.getSnapshotImages():
|
for snap_image in session.getSnapshotImages():
|
||||||
if (snap_image.provider_name == provider.name and
|
if (snap_image.provider_name == provider.name and
|
||||||
snap_image.image_name == image.name and
|
snap_image.image_name == image.name and
|
||||||
snap_image.state in [nodedb.READY,
|
snap_image.state in [nodedb.READY,
|
||||||
|
@ -609,14 +634,21 @@ class NodePool(threading.Thread):
|
||||||
(image.name, provider.name))
|
(image.name, provider.name))
|
||||||
missing = True
|
missing = True
|
||||||
if missing:
|
if missing:
|
||||||
self.updateImages()
|
self.updateImages(session)
|
||||||
|
|
||||||
def updateImages(self):
|
def _doUpdateImages(self):
|
||||||
|
try:
|
||||||
|
with self.db.getSession() as session:
|
||||||
|
self.updateImages(session)
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Exception in periodic image update:")
|
||||||
|
|
||||||
|
def updateImages(self, session):
|
||||||
# This function should be run periodically to create new snapshot
|
# This function should be run periodically to create new snapshot
|
||||||
# images.
|
# images.
|
||||||
for provider in self.config.providers.values():
|
for provider in self.config.providers.values():
|
||||||
for image in provider.images.values():
|
for image in provider.images.values():
|
||||||
snap_image = self.db.createSnapshotImage(
|
snap_image = session.createSnapshotImage(
|
||||||
provider_name=provider.name,
|
provider_name=provider.name,
|
||||||
image_name=image.name)
|
image_name=image.name)
|
||||||
t = ImageUpdater(self, provider, image, snap_image.id)
|
t = ImageUpdater(self, provider, image, snap_image.id)
|
||||||
|
@ -625,33 +657,33 @@ class NodePool(threading.Thread):
|
||||||
# Just to keep things clearer.
|
# Just to keep things clearer.
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
def launchNode(self, provider, image, target):
|
def launchNode(self, session, provider, image, target):
|
||||||
provider = self.config.providers[provider.name]
|
provider = self.config.providers[provider.name]
|
||||||
image = provider.images[image.name]
|
image = provider.images[image.name]
|
||||||
node = self.db.createNode(provider.name, image.name, target.name)
|
node = session.createNode(provider.name, image.name, target.name)
|
||||||
t = NodeLauncher(self, provider, image, target, node.id)
|
t = NodeLauncher(self, provider, image, target, node.id)
|
||||||
t.start()
|
t.start()
|
||||||
|
|
||||||
def deleteNode(self, node):
|
def deleteNode(self, session, node):
|
||||||
# Delete a node
|
# Delete a node
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
node.state = nodedb.DELETE
|
node.state = nodedb.DELETE
|
||||||
self.updateStats(node.provider_name)
|
self.updateStats(session, node.provider_name)
|
||||||
provider = self.config.providers[node.provider_name]
|
provider = self.config.providers[node.provider_name]
|
||||||
target = self.config.targets[node.target_name]
|
target = self.config.targets[node.target_name]
|
||||||
client = utils.get_client(provider)
|
client = utils.get_client(provider)
|
||||||
|
|
||||||
if target.jenkins_url:
|
if target.jenkins_url:
|
||||||
jenkins = myjenkins.Jenkins(target.jenkins_url,
|
jenkins = utils.get_jenkins(target.jenkins_url,
|
||||||
target.jenkins_user,
|
target.jenkins_user,
|
||||||
target.jenkins_apikey)
|
target.jenkins_apikey)
|
||||||
jenkins_name = node.nodename
|
jenkins_name = node.nodename
|
||||||
if jenkins.node_exists(jenkins_name):
|
if jenkins.node_exists(jenkins_name):
|
||||||
jenkins.delete_node(jenkins_name)
|
jenkins.delete_node(jenkins_name)
|
||||||
self.log.info("Deleted jenkins node ID: %s" % node.id)
|
self.log.info("Deleted jenkins node id: %s" % node.id)
|
||||||
|
|
||||||
utils.delete_node(client, node)
|
utils.delete_node(client, node)
|
||||||
self.log.info("Deleted node ID: %s" % node.id)
|
self.log.info("Deleted node id: %s" % node.id)
|
||||||
|
|
||||||
if statsd:
|
if statsd:
|
||||||
dt = int((time.time() - start_time) * 1000)
|
dt = int((time.time() - start_time) * 1000)
|
||||||
|
@ -660,7 +692,7 @@ class NodePool(threading.Thread):
|
||||||
node.target_name)
|
node.target_name)
|
||||||
statsd.timing(key, dt)
|
statsd.timing(key, dt)
|
||||||
statsd.incr(key)
|
statsd.incr(key)
|
||||||
self.updateStats(node.provider_name)
|
self.updateStats(session, node.provider_name)
|
||||||
|
|
||||||
def deleteImage(self, snap_image):
|
def deleteImage(self, snap_image):
|
||||||
# Delete a node
|
# Delete a node
|
||||||
|
@ -669,16 +701,22 @@ class NodePool(threading.Thread):
|
||||||
client = utils.get_client(provider)
|
client = utils.get_client(provider)
|
||||||
|
|
||||||
utils.delete_image(client, snap_image)
|
utils.delete_image(client, snap_image)
|
||||||
self.log.info("Deleted image ID: %s" % snap_image.id)
|
self.log.info("Deleted image id: %s" % snap_image.id)
|
||||||
|
|
||||||
def periodicCleanup(self):
|
def _doPeriodicCleanup(self):
|
||||||
|
try:
|
||||||
|
with self.db.getSession() as session:
|
||||||
|
self.periodicCleanup(session)
|
||||||
|
except Exception:
|
||||||
|
self.log.exception("Exception in periodic cleanup:")
|
||||||
|
|
||||||
|
def periodicCleanup(self, session):
|
||||||
# This function should be run periodically to clean up any hosts
|
# This function should be run periodically to clean up any hosts
|
||||||
# that may have slipped through the cracks, as well as to remove
|
# that may have slipped through the cracks, as well as to remove
|
||||||
# old images.
|
# old images.
|
||||||
|
|
||||||
self.log.debug("Starting periodic cleanup")
|
self.log.debug("Starting periodic cleanup")
|
||||||
db = nodedb.NodeDatabase(self.config.dburi)
|
for node in session.getNodes():
|
||||||
for node in db.getNodes():
|
|
||||||
if node.state in [nodedb.READY, nodedb.HOLD]:
|
if node.state in [nodedb.READY, nodedb.HOLD]:
|
||||||
continue
|
continue
|
||||||
delete = False
|
delete = False
|
||||||
|
@ -694,12 +732,12 @@ class NodePool(threading.Thread):
|
||||||
delete = True
|
delete = True
|
||||||
if delete:
|
if delete:
|
||||||
try:
|
try:
|
||||||
self.deleteNode(node)
|
self.deleteNode(session, node)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.log.exception("Exception deleting node ID: "
|
self.log.exception("Exception deleting node id: "
|
||||||
"%s" % node.id)
|
"%s" % node.id)
|
||||||
|
|
||||||
for image in db.getSnapshotImages():
|
for image in session.getSnapshotImages():
|
||||||
# Normally, reap images that have sat in their current state
|
# Normally, reap images that have sat in their current state
|
||||||
# for 24 hours, unless the image is the current snapshot
|
# for 24 hours, unless the image is the current snapshot
|
||||||
delete = False
|
delete = False
|
||||||
|
@ -713,7 +751,7 @@ class NodePool(threading.Thread):
|
||||||
self.log.info("Deleting image id: %s which has no current "
|
self.log.info("Deleting image id: %s which has no current "
|
||||||
"base image" % image.id)
|
"base image" % image.id)
|
||||||
else:
|
else:
|
||||||
current = db.getCurrentSnapshotImage(image.provider_name,
|
current = session.getCurrentSnapshotImage(image.provider_name,
|
||||||
image.image_name)
|
image.image_name)
|
||||||
if (current and image != current and
|
if (current and image != current and
|
||||||
(time.time() - current.state_time) > KEEP_OLD_IMAGE):
|
(time.time() - current.state_time) > KEEP_OLD_IMAGE):
|
||||||
|
@ -729,11 +767,10 @@ class NodePool(threading.Thread):
|
||||||
image.id)
|
image.id)
|
||||||
self.log.debug("Finished periodic cleanup")
|
self.log.debug("Finished periodic cleanup")
|
||||||
|
|
||||||
def updateStats(self, provider_name):
|
def updateStats(self, session, provider_name):
|
||||||
if not statsd:
|
if not statsd:
|
||||||
return
|
return
|
||||||
# This may be called outside of the main thread.
|
# This may be called outside of the main thread.
|
||||||
db = nodedb.NodeDatabase(self.config.dburi)
|
|
||||||
provider = self.config.providers[provider_name]
|
provider = self.config.providers[provider_name]
|
||||||
|
|
||||||
states = {}
|
states = {}
|
||||||
|
@ -750,7 +787,7 @@ class NodePool(threading.Thread):
|
||||||
key = '%s.%s' % (base_key, state)
|
key = '%s.%s' % (base_key, state)
|
||||||
states[key] = 0
|
states[key] = 0
|
||||||
|
|
||||||
for node in db.getNodes():
|
for node in session.getNodes():
|
||||||
if node.state not in nodedb.STATE_NAMES:
|
if node.state not in nodedb.STATE_NAMES:
|
||||||
continue
|
continue
|
||||||
key = 'nodepool.target.%s.%s.%s.%s' % (
|
key = 'nodepool.target.%s.%s.%s.%s' % (
|
||||||
|
|
|
@ -21,9 +21,11 @@ import time
|
||||||
import paramiko
|
import paramiko
|
||||||
import socket
|
import socket
|
||||||
import logging
|
import logging
|
||||||
|
import myjenkins
|
||||||
from sshclient import SSHClient
|
from sshclient import SSHClient
|
||||||
|
|
||||||
import nodedb
|
import nodedb
|
||||||
|
import fakeprovider
|
||||||
|
|
||||||
log = logging.getLogger("nodepool.utils")
|
log = logging.getLogger("nodepool.utils")
|
||||||
|
|
||||||
|
@ -48,9 +50,18 @@ def get_client(provider):
|
||||||
kwargs['service_name'] = provider.service_name
|
kwargs['service_name'] = provider.service_name
|
||||||
if provider.region_name:
|
if provider.region_name:
|
||||||
kwargs['region_name'] = provider.region_name
|
kwargs['region_name'] = provider.region_name
|
||||||
|
if provider.auth_url == 'fake':
|
||||||
|
return fakeprovider.FAKE_CLIENT
|
||||||
client = novaclient.client.Client(*args, **kwargs)
|
client = novaclient.client.Client(*args, **kwargs)
|
||||||
return client
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
def get_jenkins(url, user, apikey):
|
||||||
|
if apikey == 'fake':
|
||||||
|
return fakeprovider.FakeJenkins()
|
||||||
|
return myjenkins.Jenkins(url, user, apikey)
|
||||||
|
|
||||||
|
|
||||||
extension_cache = {}
|
extension_cache = {}
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,6 +161,8 @@ def wait_for_resource(wait_resource, timeout=3600):
|
||||||
|
|
||||||
|
|
||||||
def ssh_connect(ip, username, connect_kwargs={}, timeout=60):
|
def ssh_connect(ip, username, connect_kwargs={}, timeout=60):
|
||||||
|
if ip == 'fake':
|
||||||
|
return fakeprovider.FakeSSHClient()
|
||||||
# HPcloud may return errno 111 for about 30 seconds after adding the IP
|
# HPcloud may return errno 111 for about 30 seconds after adding the IP
|
||||||
for count in iterate_timeout(timeout, "ssh access"):
|
for count in iterate_timeout(timeout, "ssh access"):
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
script-dir: .
|
||||||
|
dburi: 'mysql://nodepool@localhost/nodepool'
|
||||||
|
|
||||||
|
cron:
|
||||||
|
cleanup: '*/1 * * * *'
|
||||||
|
update-image: '14 2 * * *'
|
||||||
|
|
||||||
|
zmq-publishers:
|
||||||
|
- tcp://localhost:8888
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: fake-provider
|
||||||
|
username: 'fake'
|
||||||
|
password: 'fake'
|
||||||
|
auth-url: 'fake'
|
||||||
|
project-id: 'fake'
|
||||||
|
max-servers: 96
|
||||||
|
images:
|
||||||
|
- name: nodepool-fake
|
||||||
|
base-image: 'Fake Precise'
|
||||||
|
min-ram: 8192
|
||||||
|
setup: prepare_node_devstack.sh
|
||||||
|
|
||||||
|
targets:
|
||||||
|
- name: fake-jenkins
|
||||||
|
jenkins:
|
||||||
|
url: https://jenkins.example.org/
|
||||||
|
user: fake
|
||||||
|
apikey: fake
|
||||||
|
images:
|
||||||
|
- name: nodepool-fake
|
||||||
|
providers:
|
||||||
|
- name: fake-provider
|
||||||
|
min-ready: 6
|
Loading…
Reference in New Issue