Add ready-script and multi-node support

Write information about the node group to /etc/nodepool, along
with an ssh key generated specifically for the node group.

Add an optional script that is run on each node (and sub-node) for
a label right before a node is placed in the ready state.  This
script can use the data in /etc/nodepool to setup access between
the nodes in the group.

Change-Id: Id0771c62095cccf383229780d1c4ddcf0ab42c1b
This commit is contained in:
James E. Blair 2014-03-21 15:25:07 -07:00
parent ad7b9a849b
commit db5602a91e
4 changed files with 119 additions and 3 deletions

View File

@ -102,6 +102,7 @@ providers or images are used to create them). Example::
image: precise
subnodes: 2
min-ready: 2
ready-script: setup_multinode.sh
providers:
- name: provider1
@ -118,6 +119,28 @@ communicate directly with each other. In the example above, for each
Precise node added to the target system, two additional nodes will be
created and associated with it.
The script specified by `ready-script` (which is expected to be in
`/opt/nodepool-scripts` along with the setup script) can be used to
perform any last minute changes to a node after it has been launched
but before it is put in the READY state to receive jobs. In
particular, it can read the files in /etc/nodepool to perform
multi-node related setup.
Those files include:
**/etc/nodepool/role**
Either the string ``primary`` or ``sub`` indicating whether this
node is the primary (the node added to the target and which will run
the job), or a sub-node.
**/etc/nodepool/primary_node**
The IP address of the primary node.
**/etc/nodepool/sub_nodes**
The IP addresses of the sub nodes, one on each line.
**/etc/nodepool/id_rsa**
An OpenSSH private key generated specifically for this node group.
**/etc/nodepool/id_rsa.pub**
The corresponding public key.
providers
---------

View File

@ -14,10 +14,12 @@
# License for the specific language governing permissions and limitations
# under the License.
import uuid
import time
import threading
import StringIO
import novaclient
import threading
import time
import uuid
from jenkins import JenkinsException
@ -109,13 +111,38 @@ class FakeClient(object):
self.client.region_name = None
class FakeFile(StringIO.StringIO):
def __init__(self, path):
StringIO.StringIO.__init__(self)
self.__path = path
def close(self):
print "Wrote to %s:" % self.__path
print self.getvalue()
StringIO.StringIO.close(self)
class FakeSFTPClient(object):
def open(self, path, mode):
return FakeFile(path)
def close(self):
pass
class FakeSSHClient(object):
def __init__(self):
self.client = self
def ssh(self, description, cmd):
return True
def scp(self, src, dest):
return True
def open_sftp(self):
return FakeSFTPClient()
class FakeJenkins(object):
def __init__(self, user):

View File

@ -22,6 +22,7 @@ import gear
import json
import logging
import os.path
import paramiko
import re
import threading
import time
@ -377,6 +378,15 @@ class NodeLauncher(threading.Thread):
break
time.sleep(5)
nodelist = []
for subnode in self.node.subnodes:
nodelist.append(('sub', subnode))
nodelist.append(('primary', self.node))
self.writeNodepoolInfo(nodelist)
if self.label.ready_script:
self.runReadyScript(nodelist)
# Do this before adding to jenkins to avoid a race where
# Jenkins might immediately use the node before we've updated
# the state:
@ -415,6 +425,55 @@ class NodeLauncher(threading.Thread):
params = dict(NODE=self.node.nodename)
jenkins.startBuild(self.target.jenkins_test_job, params)
def writeNodepoolInfo(self, nodelist):
key = paramiko.RSAKey.generate(2048)
public_key = key.get_name() + ' ' + key.get_base64()
for role, n in nodelist:
connect_kwargs = dict(key_filename=self.image.private_key)
host = utils.ssh_connect(n.ip, self.image.username,
connect_kwargs=connect_kwargs,
timeout=self.timeout)
if not host:
raise Exception("Unable to log in via SSH")
ftp = host.client.open_sftp()
f = ftp.open('/etc/nodepool/role', 'w')
f.write(role + '\n')
f.close()
f = ftp.open('/etc/nodepool/primary_node', 'w')
f.write(self.node.ip + '\n')
f.close()
f = ftp.open('/etc/nodepool/sub_nodes', 'w')
for subnode in self.node.subnodes:
f.write(subnode.ip + '\n')
f.close()
f = ftp.open('/etc/nodepool/id_rsa', 'w')
key.write_private_key(f)
f.close()
f = ftp.open('/etc/nodepool/id_rsa.pub', 'w')
f.write(public_key)
f.close()
ftp.close()
def runReadyScript(self, nodelist):
for role, n in nodelist:
connect_kwargs = dict(key_filename=self.image.private_key)
host = utils.ssh_connect(n.ip, self.image.username,
connect_kwargs=connect_kwargs,
timeout=self.timeout)
if not host:
raise Exception("Unable to log in via SSH")
env_vars = ''
for k, v in os.environ.items():
if k.startswith('NODEPOOL_'):
env_vars += ' %s="%s"' % (k, v)
host.ssh("run ready script",
"cd /opt/nodepool-scripts && %s ./%s" %
(env_vars, self.label.ready_script))
class SubNodeLauncher(threading.Thread):
log = logging.getLogger("nodepool.SubNodeLauncher")
@ -703,6 +762,11 @@ class ImageUpdater(threading.Thread):
raise Exception("Unable to log in via SSH")
host.ssh("make scripts dir", "mkdir -p scripts")
# /etc/nodepool is world writable because by the time we write
# the contents after the node is launched, we may not have
# sudo access any more.
host.ssh("make config dir", "sudo mkdir /etc/nodepool")
host.ssh("chmod config dir", "sudo chmod 0777 /etc/nodepool")
for fname in os.listdir(self.scriptdir):
path = os.path.join(self.scriptdir, fname)
if not os.path.isfile(path):
@ -832,6 +896,7 @@ class NodePool(threading.Thread):
l.image = label['image']
l.min_ready = label['min-ready']
l.subnodes = label.get('subnodes', 0)
l.ready_script = label.get('ready-script')
l.providers = {}
for provider in label['providers']:
p = LabelProvider()

View File

@ -20,6 +20,7 @@ labels:
- name: fake-provider
- name: multi-fake
image: nodepool-fake
ready-script: multinode_setup.sh
subnodes: 2
min-ready: 2
providers: