Add logging and retry label change

Sometimes the label change HTTP request can fail; in those cases,
try 3 times 5 seconds apart, and then log the error.

Change-Id: Ifab68c91b89edfc5b441cbcabd1035efdc3a148e
This commit is contained in:
James E. Blair
2013-07-10 23:15:25 +00:00
parent 34191892fc
commit cb9789cf26
5 changed files with 52 additions and 11 deletions

View File

@@ -34,14 +34,19 @@ NODE_NAME = sys.argv[1]
UPSTREAM_BUILD_URL=os.environ.get('UPSTREAM_BUILD_URL', '')
UPSTREAM_JOB_NAME=os.environ.get('UPSTREAM_JOB_NAME', '')
UPSTREAM_BRANCH=os.environ.get('UPSTREAM_BRANCH', '')
BUILD_URL=os.environ.get('BUILD_URL', '')
def main():
db = vmdatabase.VMDatabase()
machine = db.getMachineByJenkinsName(NODE_NAME)
if machine.state != vmdatabase.HOLD:
utils.log.debug("Set deleted ID: %s old state: %s build: %s" % (
machine.id, machine.state, BUILD_URL))
machine.state = vmdatabase.DELETE
else:
utils.log.debug("Hold ID: %s old state: %s build: %s" % (
machine.id, machine.state, BUILD_URL))
try:
utils.update_stats(machine.base_image.provider)

View File

@@ -35,8 +35,10 @@ NODE_NAME = sys.argv[1]
DEVSTACK_GATE_SECURE_CONFIG = os.environ.get('DEVSTACK_GATE_SECURE_CONFIG',
os.path.expanduser(
'~/devstack-gate-secure.conf'))
SKIP_DEVSTACK_GATE_JENKINS = os.environ.get('SKIP_DEVSTACK_GATE_JENKINS', None)
BUILD_URL=os.environ.get('BUILD_URL', '')
LABEL_RE = re.compile(r'<label>.*</label>')
LABEL_RE = re.compile(r'<label>(.*)</label>')
def main():
@@ -45,19 +47,39 @@ def main():
config = ConfigParser.ConfigParser()
config.read(DEVSTACK_GATE_SECURE_CONFIG)
jenkins = myjenkins.Jenkins(config.get('jenkins', 'server'),
config.get('jenkins', 'user'),
config.get('jenkins', 'apikey'))
jenkins.get_info()
if not SKIP_DEVSTACK_GATE_JENKINS:
jenkins = myjenkins.Jenkins(config.get('jenkins', 'server'),
config.get('jenkins', 'user'),
config.get('jenkins', 'apikey'))
jenkins.get_info()
else:
jenkins = None
machine = db.getMachineByJenkinsName(NODE_NAME)
utils.log.debug("Used ID: %s old state: %s build:%s" % (
machine.id, machine.state, BUILD_URL))
machine.state = vmdatabase.USED
if machine.jenkins_name:
if jenkins.node_exists(machine.jenkins_name):
config = jenkins.get_node_config(machine.jenkins_name)
config = LABEL_RE.sub('<label>devstack-used</label>', config)
jenkins.reconfig_node(machine.jenkins_name, config)
if jenkins:
if machine.jenkins_name:
if jenkins.node_exists(machine.jenkins_name):
config = jenkins.get_node_config(machine.jenkins_name)
old = None
m = LABEL_RE.search(config)
if m:
old = m.group(1)
config = LABEL_RE.sub('<label>devstack-used</label>', config)
for i in range(3):
try:
jenkins.reconfig_node(machine.jenkins_name, config)
except:
if i==2:
utils.log.exception("Unable to relabel ID: %s" % machine.id)
raise
time.sleep(5)
utils.log.debug("Relabeled ID: %s old label: %s new label: %s" % (
machine.id, old, 'devstack-used'))
utils.update_stats(machine.base_image.provider)

View File

@@ -81,6 +81,8 @@ def launch_node(client, snap_image, image, flavor, last_name):
print " id: %s" % (server.id)
print " name: %s" % (name)
print
utils.log.debug("Launching ID: %s name: %s provider ID: %s" %
(machine.id, name, server.id))
return server, machine
@@ -137,6 +139,7 @@ def check_machine(jenkins, client, machine, error_counts):
create_jenkins_node(jenkins, machine)
print "Machine %s is ready" % machine.id
machine.state = vmdatabase.READY
utils.log.debug("Online ID: %s" % machine.id)
return
elif not server.status.startswith('BUILD'):
count = error_counts.get(machine.id, 0)
@@ -220,6 +223,7 @@ def main():
except:
traceback.print_exc()
print "Abandoning machine %s" % machine.id
utils.log.exception("Abandoning ID: %s" % machine.id)
machine.state = vmdatabase.ERROR
error = True
db.commit()

View File

@@ -63,12 +63,14 @@ def delete_machine(jenkins, client, machine):
# If we have deleted a server, don't believe it. Instead, wait for
# the next run of the script and only if the server doesn't exist,
# delete it from Jenkins and the DB.
utils.log.debug("Delete ID: %s" % machine.id)
return
if jenkins:
if machine.jenkins_name:
if jenkins.node_exists(machine.jenkins_name):
jenkins.delete_node(machine.jenkins_name)
utils.log.debug("Delete jenkins node ID: %s" % machine.id)
machine.delete()

View File

@@ -28,9 +28,17 @@ import paramiko
import socket
from sshclient import SSHClient
from statsd import statsd
import logging
import logging.handlers
import vmdatabase
log = logging.getLogger('devstack-gate')
log.setLevel(logging.DEBUG)
handler = logging.handlers.SysLogHandler(address = '/dev/log')
handler.setFormatter(logging.Formatter("devstack-gate: %(message)s"))
log.addHandler(handler)
def iterate_timeout(max_seconds, purpose):
start = time.time()
count = 0