Keep a pool of machines ready for devstack.

Change-Id: Ic7b07747979f955b7fb8fa59c396cbafdaf11b73
This commit is contained in:
James E. Blair
2011-12-23 10:03:50 -08:00
parent a31715396b
commit 9226e2f535
7 changed files with 210 additions and 88 deletions

View File

@@ -33,13 +33,9 @@ CLOUD_SERVERS_DRIVER = os.environ.get('CLOUD_SERVERS_DRIVER','rackspace')
CLOUD_SERVERS_USERNAME = os.environ['CLOUD_SERVERS_USERNAME']
CLOUD_SERVERS_API_KEY = os.environ['CLOUD_SERVERS_API_KEY']
CHANGE = os.environ['GERRIT_CHANGE_NUMBER']
PATCH = os.environ['GERRIT_PATCHSET_NUMBER']
BUILD = os.environ['BUILD_NUMBER']
node_uuid = sys.argv[1]
db = vmdatabase.VMDatabase()
machine = db.getMachine(CHANGE, PATCH, BUILD)
node_name = machine['name']
machine = db.getMachine(node_uuid)
if CLOUD_SERVERS_DRIVER == 'rackspace':
Driver = get_driver(Provider.RACKSPACE)
@@ -47,4 +43,4 @@ if CLOUD_SERVERS_DRIVER == 'rackspace':
node = [n for n in conn.list_nodes() if n.id==str(machine['id'])][0]
node.destroy()
db.delMachine(machine['id'])
db.delMachine(node_uuid)

View File

@@ -0,0 +1,30 @@
#!/usr/bin/env python
# Fetch a ready VM for use by devstack.
# Copyright (C) 2011 OpenStack LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
import vmdatabase
db = vmdatabase.VMDatabase()
node = db.getMachineForUse()
if not node:
raise Exception("No ready nodes")
print "NODE_IP_ADDR=%s\n" % node['ip']
print "NODE_UUID=%s\n" % node['uuid']

View File

@@ -62,31 +62,29 @@ do
cd $WORKSPACE
done
python $CI_SCRIPT_DIR/devstack-vm-launch.py || exit $?
. $HOSTNAME.node.sh
rm $HOSTNAME.node.sh
eval `python $CI_SCRIPT_DIR/devstack-vm-fetch.py` || exit $?
scp -C $CI_SCRIPT_DIR/devstack-vm-gate-host.sh $ipAddr:
scp -C $CI_SCRIPT_DIR/devstack-vm-gate-host.sh $NODE_IP_ADDR:
RETVAL=$?
if [ $RETVAL != 0 ]; then
echo "Deleting host"
python $CI_SCRIPT_DIR/devstack-vm-delete.py
python $CI_SCRIPT_DIR/devstack-vm-delete.py $NODE_UUID
fi
scp -C -q -r $WORKSPACE/ $ipAddr:workspace
scp -C -q -r $WORKSPACE/ $NODE_IP_ADDR:workspace
RETVAL=$?
if [ $RETVAL != 0 ]; then
echo "Deleting host"
python $CI_SCRIPT_DIR/devstack-vm-delete.py
python $CI_SCRIPT_DIR/devstack-vm-delete.py $NODE_UUID
fi
ssh $ipAddr ./devstack-vm-gate-host.sh
ssh $NODE_IP_ADDR ./devstack-vm-gate-host.sh
RETVAL=$?
if [ $RETVAL = 0 ] && [ $ALWAYS_KEEP = 0 ]; then
echo "Deleting host"
python $CI_SCRIPT_DIR/devstack-vm-delete.py
python $CI_SCRIPT_DIR/devstack-vm-delete.py $NODE_UUID
else
#echo "Giving host to developer"
#python $CI_SCRIPT_DIR/devstack-vm-give.py
#python $CI_SCRIPT_DIR/devstack-vm-give.py $NODE_UUID
exit $RETVAL
fi

View File

@@ -28,12 +28,9 @@ import tempfile
import vmdatabase
CHANGE = os.environ['GERRIT_CHANGE_NUMBER']
PATCH = os.environ['GERRIT_PATCHSET_NUMBER']
BUILD = os.environ['BUILD_NUMBER']
node_uuid = sys.argv[1]
db = vmdatabase.VMDatabase()
machine = db.getMachine(CHANGE, PATCH, BUILD)
machine = db.getMachine(node_uuid)
stat, out = commands.getstatusoutput("ssh -p 29418 review.openstack.org gerrit query --format=JSON change:%s" % os.environ['GERRIT_CHANGE_NUMBER'])

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python
# Launch a VM for use by devstack.
# Make sure there are always a certain number of VMs launched and
# ready for use by devstack.
# Copyright (C) 2011 OpenStack LLC.
#
@@ -19,6 +20,7 @@
# limitations under the License.
from libcloud.base import NodeImage, NodeSize, NodeLocation
from libcloud.compute.types import NodeState
from libcloud.types import Provider
from libcloud.providers import get_driver
from libcloud.deployment import MultiStepDeployment, ScriptDeployment, SSHKeyDeployment
@@ -32,17 +34,29 @@ import vmdatabase
CLOUD_SERVERS_DRIVER = os.environ.get('CLOUD_SERVERS_DRIVER','rackspace')
CLOUD_SERVERS_USERNAME = os.environ['CLOUD_SERVERS_USERNAME']
CLOUD_SERVERS_API_KEY = os.environ['CLOUD_SERVERS_API_KEY']
CLOUD_SERVERS_HOST = os.environ.get('CLOUD_SERVERS_HOST', None)
CLOUD_SERVERS_PATH = os.environ.get('CLOUD_SERVERS_PATH', None)
IMAGE_NAME = 'devstack-oneiric'
MIN_RAM = 1024
CHANGE = os.environ['GERRIT_CHANGE_NUMBER']
PATCH = os.environ['GERRIT_PATCHSET_NUMBER']
BUILD = os.environ['BUILD_NUMBER']
MIN_READY_MACHINES = 5
db = vmdatabase.VMDatabase()
node_name = 'devstack-%s-%s-%s.slave.openstack.org' % (CHANGE, PATCH, BUILD)
ready_machines = [x for x in db.getMachines()
if x['state'] == vmdatabase.READY]
building_machines = [x for x in db.getMachines()
if x['state'] == vmdatabase.BUILDING]
# Count machines that are ready and machines that are building,
# so that if the provider is very slow, we aren't queueing up tons
# of machines to be built.
num_to_launch = MIN_READY_MACHINES - (len(ready_machines) +
len(building_machines))
print "%s ready, %s building, need to launch %s" % (len(ready_machines),
len(building_machines),
num_to_launch)
if num_to_launch <= 0:
sys.exit(0)
if CLOUD_SERVERS_DRIVER == 'rackspace':
Driver = get_driver(Provider.RACKSPACE)
@@ -55,22 +69,56 @@ if CLOUD_SERVERS_DRIVER == 'rackspace':
images = [img for img in conn.list_images()
if img.name.startswith(IMAGE_NAME)]
images.sort()
if not len(images):
raise Exception("No images found")
image = images[-1]
else:
raise Exception ("Driver not supported")
if CLOUD_SERVERS_DRIVER == 'rackspace':
node = conn.create_node(name=node_name, image=image, size=size)
# A private method, Tomaz Muraus says he's thinking of making it public
node = conn._wait_until_running(node=node, wait_period=3,
timeout=600)
print "Node ID:", node.id
print "Node IP:", node.public_ip[0]
db.addMachine(node.id, node_name, node.public_ip[0], CHANGE, PATCH, BUILD)
with open("%s.node.sh" % node_name,"w") as node_file:
node_file.write("ipAddr=%s\n" % node.public_ip[0])
node_file.write("nodeId=%s\n" % node.id)
last_name = ''
for i in range(num_to_launch):
while True:
node_name = 'devstack-%s.slave.openstack.org' % int(time.time())
if node_name != last_name: break
time.sleep(1)
node = conn.create_node(name=node_name, image=image, size=size)
db.addMachine(CLOUD_SERVERS_DRIVER, node.id, IMAGE_NAME,
node_name, node.public_ip[0], node.uuid)
print "Started building node %s:" % node.id
print " name: %s [%s]" % (node_name, node.public_ip[0])
print " uuid: %s" % (node.uuid)
print
# Wait for nodes
# TODO: The vmdatabase is (probably) ready, but this needs reworking to
# actually support multiple providers
start = time.time()
timeout = 600
to_ignore = []
while (time.time()-start) < timeout:
building_machines = [x for x in db.getMachines()
if x['state'] == vmdatabase.BUILDING]
if not building_machines:
print "Finished"
break
provider_nodes = conn.list_nodes()
print "Waiting on %s machines" % len(building_machines)
for my_node in building_machines:
if my_node['uuid'] in to_ignore: continue
p_nodes = [x for x in provider_nodes if x.uuid == my_node['uuid']]
if len(p_nodes) != 1:
print "Incorrect number of nodes (%s) from provider matching UUID %s" % (len(p_nodes), my_node['uuid'])
to_ignore.append(my_node)
else:
p_node = p_nodes[0]
if (p_node.public_ips and p_node.state == NodeState.RUNNING):
print "Node %s is ready" % my_node['id']
db.setMachineState(my_node['uuid'], vmdatabase.READY)
if (p_node.public_ips and p_node.state in
[NodeState.UNKNOWN,
NodeState.REBOOTING,
NodeState.TERMINATED]):
print "Node %s is in error" % my_node['id']
db.setMachineState(my_node['uuid'], vmdatabase.ERROR)
time.sleep(3)

View File

@@ -32,6 +32,7 @@ import vmdatabase
CLOUD_SERVERS_DRIVER = os.environ.get('CLOUD_SERVERS_DRIVER','rackspace')
CLOUD_SERVERS_USERNAME = os.environ['CLOUD_SERVERS_USERNAME']
CLOUD_SERVERS_API_KEY = os.environ['CLOUD_SERVERS_API_KEY']
MACHINE_LIFETIME = 24*60*60 # Amount of time after being used
db = vmdatabase.VMDatabase()
@@ -53,18 +54,21 @@ def delete(machine):
node = [n for n in conn.list_nodes() if n.id==str(machine['id'])]
if not node:
print ' Machine id %s not found' % machine['id']
db.delMachine(machine['id'])
db.delMachine(machine['uuid'])
return
node = node[0]
node.destroy()
db.delMachine(machine['id'])
db.delMachine(machine['uuid'])
now = time.time()
for machine in db.getMachines():
if REAP_ALL or (now-machine['created'] > 24*60*60):
# Normally, reap machines that have sat in their current state
# for 24 hours, unless that state is READY.
if REAP_ALL or (machine['state']!=vmdatabase.READY and
now-machine['state_time'] > MACHINE_LIFETIME):
print 'Deleting', machine['name']
delete(machine)
print
print 'Known machines (end):'
for machine in db.getMachines():

View File

@@ -2,56 +2,105 @@ import sqlite3
import os
import time
# States:
# The cloud provider is building this machine. We have an ID, but it's
# not ready for use.
BUILDING=1
# The machine is ready for use.
READY=2
# This can mean in-use, or used but complete. We don't actually need to
# distinguish between those states -- we'll just delete a machine 24 hours
# after it transitions into the USED state.
USED=3
# An error state, should just try to delete it.
ERROR=4
# Columns:
# state: one of the above values
# state_time: the time of transition into that state
# user: set if the machine is given to a user
# id: identifier from cloud provider
# name: machine name
# ip: machine ip
# uuid: uuid from libcloud
# provider: libcloud driver for this server
# image: name of image this server is based on
class VMDatabase(object):
def __init__(self, path=os.path.expanduser("~/vm.db")):
# Set isolation_level = None, which means "autocommit" mode
# but more importantly lets you manage transactions manually
# without the isolation emulation getting in your way.
# Most of our writes can be autocomitted, and the one(s)
# that can't, we'll set up the transaction around the critical
# section.
if not os.path.exists(path):
conn = sqlite3.connect(path)
c = conn.cursor()
c.execute('''create table machines
(id int, name text, ip text, change_number, patch_number, build_number, created int, user text)''')
conn.commit()
c.close()
self.conn = sqlite3.connect(path)
conn = sqlite3.connect(path, isolation_level=None)
conn.execute("""create table machines
(provider text, id int, image text,
name text, ip text, uuid text,
state_time int, state int, user text)""")
del conn
self.conn = sqlite3.connect(path, isolation_level = None)
# This turns the returned rows into objects that are like lists
# and dicts at the same time:
self.conn.row_factory = sqlite3.Row
def addMachine(self, mid, name, ip, change, patch, build):
c = self.conn.cursor()
c.execute("insert into machines (id, name, ip, change_number, patch_number, build_number, created) values (?, ?, ?, ?, ?, ?, ?)",
(mid, name, ip, change, patch, build, int(time.time())))
self.conn.commit()
c.close()
def addMachine(self, provider, mid, image, name, ip, uuid):
self.conn.execute("""insert into machines
(provider, id, image, name, ip,
uuid, state_time, state)
values (?, ?, ?, ?, ?, ?, ?, ?)""",
(provider, mid, image, name, ip, uuid,
int(time.time()), BUILDING))
def delMachine(self, mid):
c = self.conn.cursor()
c.execute("delete from machines where id=?", (mid,))
self.conn.commit()
c.close()
def delMachine(self, uuid):
self.conn.execute("delete from machines where uuid=?", (uuid,))
def setMachineUser(self, mid, user):
c = self.conn.cursor()
c.execute("update machines set user=? where id=?", (user, mid))
self.conn.commit()
c.close()
def setMachineUser(self, uuid, user):
self.conn.execute("update machines set user=? where uuid=?",
(user, uuid))
def setMachineState(self, uuid, state):
self.conn.execute("""update machines set state=?, state_time=?
where uuid=?""",
(state, int(time.time()), uuid))
def getMachines(self):
c = self.conn.cursor()
c.execute("select * from machines")
names = [col[0] for col in c.description]
data = [dict(zip(names, row)) for row in c]
c.close()
return data
return self.conn.execute("select * from machines order by state_time")
def getMachine(self, change, patch, build):
c = self.conn.cursor()
c.execute("select * from machines where change_number=? and patch_number=? and build_number=?", (change, patch, build))
names = [col[0] for col in c.description]
data = [row for row in c]
c.close()
return dict(zip(names, data[0]))
def getMachine(self, uuid):
for x in self.conn.execute("select * from machines where uuid=?",
(uuid,)):
return x
def getMachineForUse(self):
"""Atomically find a machine that is ready for use, and update
its state."""
self.conn.execute("begin exclusive transaction")
ret = None
for m in self.getMachines():
if m['state']==READY:
self.setMachineState(m['id'], USED)
ret = m
break
self.conn.execute("commit")
return ret
if __name__=='__main__':
db = VMDatabase()
db.addMachine(1, 'foo', '1.2.3.4', 88, 2, 1)
db.setMachineUser(1, 'jeblair')
db = VMDatabase("/tmp/vm.db")
db.addMachine('rackspace', 1, 'devstack', 'foo', '1.2.3.4', 'uuid1')
db.setMachineState('uuid1', READY)
db.addMachine('rackspace', 2, 'devstack', 'foo2', '1.2.3.4', 'uuid2')
db.setMachineState('uuid2', READY)
m = db.getMachineForUse()
print 'got machine'
print m
db.setMachineUser(m['uuid'], 'jeblair')
print db.getMachines()
print db.getMachine(88,2,1)
db.delMachine(1)
print db.getMachine(1)
print 'waiting to delete'
time.sleep(2)
db.delMachine('uuid1')
db.delMachine('uuid2')