Uniquely identify launchers

Previously launchers were identify as:

  hostname-pid-provider-pool

The problem with this setup is that using containers with host
networking and namespaced pids you can end up with multiple launchers
using the same launcher id for the same provider pool. This problem also
arises if you are replacing launcher1.example.com with
launcher1.otherexample.com.

To fix this we update the launcher ids to be:

  fqdn-provider-pool-randomuuid

These ids are already not expected to survive beyond the lifetime of any
single process as they use a pid value. This means we can replace this
pid value with a randomly generated uuid value instead. We also use the
host fqdn instead of hostname to be less ambiguous in the case where two
different hosts with different fqdns share a hostname.

Change-Id: I419718e63b31b12d8dfe971031cd8a81ad582480
This commit is contained in:
Clark Boylan 2021-03-09 13:58:01 -08:00
parent 446c0ca7d2
commit 66b6c27dbf
2 changed files with 10 additions and 7 deletions

View File

@ -21,6 +21,7 @@ import os.path
import socket
import threading
import time
import uuid
from kazoo import exceptions as kze
@ -137,9 +138,9 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
self.request_handlers = []
self.watermark_sleep = nodepool.watermark_sleep
self.zk = self.getZK()
self.launcher_id = "%s-%s-%s" % (socket.gethostname(),
os.getpid(),
self.name)
self.launcher_id = "%s-%s-%s" % (socket.getfqdn(),
self.name,
uuid.uuid4().hex)
stats.StatsReporter.__init__(self)
# ---------------------------------------------------------------

View File

@ -13,6 +13,7 @@
import testtools
import time
import uuid
from nodepool import exceptions as npe
from nodepool import tests
@ -564,7 +565,7 @@ class TestZooKeeper(tests.DBTestCase):
def test_registerLauncher(self):
launcher = zk.Launcher()
launcher.id = "launcher-000-001"
launcher.id = "launcher-Poolworker.provider-main-" + uuid.uuid4().hex
self.zk.registerLauncher(launcher)
launchers = self.zk.getRegisteredLaunchers()
self.assertEqual(1, len(launchers))
@ -572,7 +573,7 @@ class TestZooKeeper(tests.DBTestCase):
def test_registerLauncher_safe_repeat(self):
launcher = zk.Launcher()
launcher.id = "launcher-000-001"
launcher.id = "launcher-Poolworker.provider-main-" + uuid.uuid4().hex
self.zk.registerLauncher(launcher)
self.zk.registerLauncher(launcher)
launchers = self.zk.getRegisteredLaunchers()
@ -1014,7 +1015,7 @@ class TestZKModel(tests.BaseTestCase):
o.public_ipv6 = '<ipv6>'
o.host_id = 'fake-host-id'
o.image_id = 'image-id'
o.launcher = 'launcher-id'
o.launcher = 'launcher-Poolworker.provider-main-' + uuid.uuid4().hex
o.external_id = 'ABCD'
o.hostname = 'xyz'
o.comment = 'comment'
@ -1048,6 +1049,7 @@ class TestZKModel(tests.BaseTestCase):
def test_Node_fromDict(self):
now = int(time.time())
node_id = '123'
launcher_id = 'launcher-Poolworker.provider-main-' + uuid.uuid4().hex
d = {
'state': zk.READY,
'state_time': now,
@ -1062,7 +1064,7 @@ class TestZKModel(tests.BaseTestCase):
'public_ipv6': '<ipv6>',
'host_id': 'fake-host-id',
'image_id': 'image-id',
'launcher': 'launcher-id',
'launcher': launcher_id,
'external_id': 'ABCD',
'hostname': 'xyz',
'comment': 'comment',