Add logging and retry label change
Sometimes the label change HTTP request can fail; in those cases, try 3 times 5 seconds apart, and then log the error. Change-Id: Ifab68c91b89edfc5b441cbcabd1035efdc3a148e
This commit is contained in:
@@ -34,14 +34,19 @@ NODE_NAME = sys.argv[1]
|
||||
UPSTREAM_BUILD_URL=os.environ.get('UPSTREAM_BUILD_URL', '')
|
||||
UPSTREAM_JOB_NAME=os.environ.get('UPSTREAM_JOB_NAME', '')
|
||||
UPSTREAM_BRANCH=os.environ.get('UPSTREAM_BRANCH', '')
|
||||
|
||||
BUILD_URL=os.environ.get('BUILD_URL', '')
|
||||
|
||||
def main():
|
||||
db = vmdatabase.VMDatabase()
|
||||
|
||||
machine = db.getMachineByJenkinsName(NODE_NAME)
|
||||
if machine.state != vmdatabase.HOLD:
|
||||
utils.log.debug("Set deleted ID: %s old state: %s build: %s" % (
|
||||
machine.id, machine.state, BUILD_URL))
|
||||
machine.state = vmdatabase.DELETE
|
||||
else:
|
||||
utils.log.debug("Hold ID: %s old state: %s build: %s" % (
|
||||
machine.id, machine.state, BUILD_URL))
|
||||
|
||||
try:
|
||||
utils.update_stats(machine.base_image.provider)
|
||||
|
||||
@@ -35,8 +35,10 @@ NODE_NAME = sys.argv[1]
|
||||
DEVSTACK_GATE_SECURE_CONFIG = os.environ.get('DEVSTACK_GATE_SECURE_CONFIG',
|
||||
os.path.expanduser(
|
||||
'~/devstack-gate-secure.conf'))
|
||||
SKIP_DEVSTACK_GATE_JENKINS = os.environ.get('SKIP_DEVSTACK_GATE_JENKINS', None)
|
||||
BUILD_URL=os.environ.get('BUILD_URL', '')
|
||||
|
||||
LABEL_RE = re.compile(r'<label>.*</label>')
|
||||
LABEL_RE = re.compile(r'<label>(.*)</label>')
|
||||
|
||||
|
||||
def main():
|
||||
@@ -45,19 +47,39 @@ def main():
|
||||
config = ConfigParser.ConfigParser()
|
||||
config.read(DEVSTACK_GATE_SECURE_CONFIG)
|
||||
|
||||
jenkins = myjenkins.Jenkins(config.get('jenkins', 'server'),
|
||||
config.get('jenkins', 'user'),
|
||||
config.get('jenkins', 'apikey'))
|
||||
jenkins.get_info()
|
||||
if not SKIP_DEVSTACK_GATE_JENKINS:
|
||||
jenkins = myjenkins.Jenkins(config.get('jenkins', 'server'),
|
||||
config.get('jenkins', 'user'),
|
||||
config.get('jenkins', 'apikey'))
|
||||
jenkins.get_info()
|
||||
else:
|
||||
jenkins = None
|
||||
|
||||
machine = db.getMachineByJenkinsName(NODE_NAME)
|
||||
utils.log.debug("Used ID: %s old state: %s build:%s" % (
|
||||
machine.id, machine.state, BUILD_URL))
|
||||
|
||||
machine.state = vmdatabase.USED
|
||||
|
||||
if machine.jenkins_name:
|
||||
if jenkins.node_exists(machine.jenkins_name):
|
||||
config = jenkins.get_node_config(machine.jenkins_name)
|
||||
config = LABEL_RE.sub('<label>devstack-used</label>', config)
|
||||
jenkins.reconfig_node(machine.jenkins_name, config)
|
||||
if jenkins:
|
||||
if machine.jenkins_name:
|
||||
if jenkins.node_exists(machine.jenkins_name):
|
||||
config = jenkins.get_node_config(machine.jenkins_name)
|
||||
old = None
|
||||
m = LABEL_RE.search(config)
|
||||
if m:
|
||||
old = m.group(1)
|
||||
config = LABEL_RE.sub('<label>devstack-used</label>', config)
|
||||
for i in range(3):
|
||||
try:
|
||||
jenkins.reconfig_node(machine.jenkins_name, config)
|
||||
except:
|
||||
if i==2:
|
||||
utils.log.exception("Unable to relabel ID: %s" % machine.id)
|
||||
raise
|
||||
time.sleep(5)
|
||||
utils.log.debug("Relabeled ID: %s old label: %s new label: %s" % (
|
||||
machine.id, old, 'devstack-used'))
|
||||
|
||||
utils.update_stats(machine.base_image.provider)
|
||||
|
||||
|
||||
@@ -81,6 +81,8 @@ def launch_node(client, snap_image, image, flavor, last_name):
|
||||
print " id: %s" % (server.id)
|
||||
print " name: %s" % (name)
|
||||
print
|
||||
utils.log.debug("Launching ID: %s name: %s provider ID: %s" %
|
||||
(machine.id, name, server.id))
|
||||
return server, machine
|
||||
|
||||
|
||||
@@ -137,6 +139,7 @@ def check_machine(jenkins, client, machine, error_counts):
|
||||
create_jenkins_node(jenkins, machine)
|
||||
print "Machine %s is ready" % machine.id
|
||||
machine.state = vmdatabase.READY
|
||||
utils.log.debug("Online ID: %s" % machine.id)
|
||||
return
|
||||
elif not server.status.startswith('BUILD'):
|
||||
count = error_counts.get(machine.id, 0)
|
||||
@@ -220,6 +223,7 @@ def main():
|
||||
except:
|
||||
traceback.print_exc()
|
||||
print "Abandoning machine %s" % machine.id
|
||||
utils.log.exception("Abandoning ID: %s" % machine.id)
|
||||
machine.state = vmdatabase.ERROR
|
||||
error = True
|
||||
db.commit()
|
||||
|
||||
@@ -63,12 +63,14 @@ def delete_machine(jenkins, client, machine):
|
||||
# If we have deleted a server, don't believe it. Instead, wait for
|
||||
# the next run of the script and only if the server doesn't exist,
|
||||
# delete it from Jenkins and the DB.
|
||||
utils.log.debug("Delete ID: %s" % machine.id)
|
||||
return
|
||||
|
||||
if jenkins:
|
||||
if machine.jenkins_name:
|
||||
if jenkins.node_exists(machine.jenkins_name):
|
||||
jenkins.delete_node(machine.jenkins_name)
|
||||
utils.log.debug("Delete jenkins node ID: %s" % machine.id)
|
||||
|
||||
machine.delete()
|
||||
|
||||
|
||||
8
utils.py
8
utils.py
@@ -28,9 +28,17 @@ import paramiko
|
||||
import socket
|
||||
from sshclient import SSHClient
|
||||
from statsd import statsd
|
||||
import logging
|
||||
import logging.handlers
|
||||
|
||||
import vmdatabase
|
||||
|
||||
log = logging.getLogger('devstack-gate')
|
||||
log.setLevel(logging.DEBUG)
|
||||
handler = logging.handlers.SysLogHandler(address = '/dev/log')
|
||||
handler.setFormatter(logging.Formatter("devstack-gate: %(message)s"))
|
||||
log.addHandler(handler)
|
||||
|
||||
def iterate_timeout(max_seconds, purpose):
|
||||
start = time.time()
|
||||
count = 0
|
||||
|
||||
Reference in New Issue
Block a user