Fix queue_manager in a containerized environment
This patch fixes the operation of queue_manager in a containerized environment by adding an additional check on the start_time in ticks since boot. This way, we can detect a restart even when the PID remains unchanged as it is ussual in containers, but the start_time is different. [1] https://www.man7.org/linux/man-pages//man5/proc_pid_stat.5.html From man page above: (22) starttime %llu The time the process started after system boot. Before Linux 2.6, this value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks (divide by sysconf(_SC_CLK_TCK)). Closes-Bug: #2078935 Change-Id: I9e22433ec039ad6783593d9cb7fbe22c9090534e
This commit is contained in:
parent
d601f7aae0
commit
6790f702fa
@ -66,6 +66,13 @@ class QManager(object):
|
||||
# We use the process group to restart the counter on service restart
|
||||
self.pg = os.getpgrp()
|
||||
|
||||
# We need to also handle containerized deployments, so let's
|
||||
# parse start time (in jiffies) since system boot
|
||||
#
|
||||
# https://www.man7.org/linux/man-pages//man5/proc_pid_stat.5.html
|
||||
with open(f'/proc/{self.pg}/stat', 'r') as f:
|
||||
self.start_time = int(f.read().split()[21])
|
||||
|
||||
def get(self):
|
||||
lock_name = 'oslo_read_shm_%s_%s' % (self.hostname, self.processname)
|
||||
|
||||
@ -75,28 +82,32 @@ class QManager(object):
|
||||
# This function is thread and process safe thanks to lockutils
|
||||
try:
|
||||
with open(self.file_name, 'r') as f:
|
||||
pg, c = f.readline().split(':')
|
||||
pg, counter, start_time = f.readline().split(':')
|
||||
pg = int(pg)
|
||||
c = int(c)
|
||||
counter = int(counter)
|
||||
start_time = int(start_time)
|
||||
except (FileNotFoundError, ValueError):
|
||||
pg = self.pg
|
||||
c = 0
|
||||
counter = 0
|
||||
start_time = self.start_time
|
||||
|
||||
# Increment the counter
|
||||
if pg == self.pg:
|
||||
c += 1
|
||||
if pg == self.pg and start_time == self.start_time:
|
||||
counter += 1
|
||||
else:
|
||||
# The process group changed, maybe service restarted?
|
||||
# The process group is changed, or start time since system boot
|
||||
# differs. Maybe service restarted ?
|
||||
# Start over the counter
|
||||
c = 1
|
||||
counter = 1
|
||||
|
||||
# Write the new counter
|
||||
with open(self.file_name, 'w') as f:
|
||||
f.write(str(self.pg) + ':' + str(c))
|
||||
return c
|
||||
f.write(str(self.pg) + ':' + str(counter) + ':' +
|
||||
str(start_time))
|
||||
return counter
|
||||
|
||||
c = read_from_shm()
|
||||
return self.hostname + ":" + self.processname + ":" + str(c)
|
||||
counter = read_from_shm()
|
||||
return self.hostname + ":" + self.processname + ":" + str(counter)
|
||||
|
||||
|
||||
class MessageOperationsHandler(object):
|
||||
|
Loading…
x
Reference in New Issue
Block a user