partition deleted immediately after creation
There is a race between agent_audit and applying partitions manifest: 1. agent_audit starts to collect partition information for the current node 2. at the same time there is a request to apply partitions manifest for a new configuration; exec. call for running puppet is not eventlet friendly and blocks the agent including the in-progress audit 3. partitions are created and an update is sent back to conductor that sets new partition status "Ready" 4. agent audit resumes execution and sends an update with the partition info status collected before partitions manifest ran (without the new partition) 5. conductor doesn't find new partition in status update and removes it from the database Add lock around agent_audit() and config_apply_runtime_manifests() to prevent them from running both at the same time. Add lock in manage_partitions's run() and agent's _update_disk_partitions() to prevent agent from running in case partitions manifest is applied by an external trigger (not by sysinv-agent). Refactor common ipartition_update_by_ihost( ..., ipartition_get()) code. Closes-Bug: #1790159 Change-Id: I0c730f9c249810d7eea5e3192c819c498fe30602
This commit is contained in:
parent
f19dd0498f
commit
ca5505bee9
|
@ -102,6 +102,8 @@ FIRST_BOOT_FLAG = os.path.join(
|
|||
|
||||
PUPPET_HIERADATA_PATH = os.path.join(tsc.PUPPET_PATH, 'hieradata')
|
||||
|
||||
LOCK_AGENT_ACTION = 'agent-exclusive-action'
|
||||
|
||||
|
||||
class FakeGlobalSectionHead(object):
|
||||
def __init__(self, fp):
|
||||
|
@ -837,19 +839,8 @@ class AgentManager(service.PeriodicService):
|
|||
LOG.exception("Sysinv Agent exception updating idisk conductor.")
|
||||
pass
|
||||
|
||||
ipartition = self._ipartition_operator.ipartition_get()
|
||||
try:
|
||||
rpcapi.ipartition_update_by_ihost(icontext,
|
||||
ihost['uuid'],
|
||||
ipartition)
|
||||
except AttributeError:
|
||||
# safe to ignore during upgrades
|
||||
LOG.warn("Skip updating ipartition conductor. "
|
||||
"Upgrade in progress?")
|
||||
except exception.SysinvException:
|
||||
LOG.exception("Sysinv Agent exception updating ipartition"
|
||||
" conductor.")
|
||||
pass
|
||||
self._update_disk_partitions(rpcapi, icontext,
|
||||
ihost['uuid'], force_update=True)
|
||||
|
||||
ipv = self._ipv_operator.ipv_get()
|
||||
try:
|
||||
|
@ -1025,6 +1016,27 @@ class AgentManager(service.PeriodicService):
|
|||
return False
|
||||
return True
|
||||
|
||||
@utils.synchronized(constants.PARTITION_MANAGE_LOCK)
|
||||
def _update_disk_partitions(self, rpcapi, icontext,
|
||||
host_uuid, force_update=False):
|
||||
ipartition = self._ipartition_operator.ipartition_get()
|
||||
if not force_update:
|
||||
if self._prev_partition == ipartition:
|
||||
return
|
||||
self._prev_partition = ipartition
|
||||
try:
|
||||
rpcapi.ipartition_update_by_ihost(
|
||||
icontext, host_uuid, ipartition)
|
||||
except AttributeError:
|
||||
# safe to ignore during upgrades
|
||||
LOG.warn("Skip updating ipartition conductor. "
|
||||
"Upgrade in progress?")
|
||||
except exception.SysinvException:
|
||||
LOG.exception("Sysinv Agent exception updating "
|
||||
"ipartition conductor.")
|
||||
if not force_update:
|
||||
self._prev_partition = None
|
||||
|
||||
@periodic_task.periodic_task(spacing=CONF.agent.audit_interval,
|
||||
run_immediately=True)
|
||||
def _agent_audit(self, context):
|
||||
|
@ -1032,6 +1044,7 @@ class AgentManager(service.PeriodicService):
|
|||
self.agent_audit(context, host_uuid=self._ihost_uuid,
|
||||
force_updates=None)
|
||||
|
||||
@utils.synchronized(LOCK_AGENT_ACTION, external=False)
|
||||
def agent_audit(self, context, host_uuid, force_updates, cinder_device=None):
|
||||
# perform inventory audit
|
||||
if self._ihost_uuid != host_uuid:
|
||||
|
@ -1199,23 +1212,7 @@ class AgentManager(service.PeriodicService):
|
|||
|
||||
# Update disk partitions
|
||||
if self._ihost_personality != constants.STORAGE:
|
||||
ipartition = self._ipartition_operator.ipartition_get()
|
||||
if ((self._prev_partition is None) or
|
||||
(self._prev_partition != ipartition)):
|
||||
self._prev_partition = ipartition
|
||||
try:
|
||||
rpcapi.ipartition_update_by_ihost(icontext,
|
||||
self._ihost_uuid,
|
||||
ipartition)
|
||||
except AttributeError:
|
||||
# safe to ignore during upgrades
|
||||
LOG.warn("Skip updating ipartition conductor. "
|
||||
"Upgrade in progress?")
|
||||
except exception.SysinvException:
|
||||
LOG.exception("Sysinv Agent exception updating "
|
||||
"ipartition conductor.")
|
||||
self._prev_partition = None
|
||||
pass
|
||||
self._update_disk_partitions(rpcapi, icontext, self._ihost_uuid)
|
||||
|
||||
# Update physical volumes
|
||||
ipv = self._ipv_operator.ipv_get(cinder_device=cinder_device)
|
||||
|
@ -1357,6 +1354,7 @@ class AgentManager(service.PeriodicService):
|
|||
self._update_config_applied(iconfig_uuid)
|
||||
self._report_config_applied(context)
|
||||
|
||||
@utils.synchronized(LOCK_AGENT_ACTION, external=False)
|
||||
def config_apply_runtime_manifest(self, context, config_uuid, config_dict):
|
||||
"""Asynchronously, have the agent apply the runtime manifest with the
|
||||
list of supplied tasks.
|
||||
|
|
|
@ -859,6 +859,11 @@ CONF.register_cli_opt(
|
|||
handler=add_action_parsers))
|
||||
|
||||
|
||||
@utils.synchronized(constants.PARTITION_MANAGE_LOCK)
|
||||
def run(action, data, mode, pfile):
|
||||
action(data, mode, pfile)
|
||||
|
||||
|
||||
def main(argv):
|
||||
sysinv_service.prepare_service(argv)
|
||||
global LOG
|
||||
|
@ -873,7 +878,8 @@ def main(argv):
|
|||
"data": CONF.action.data})
|
||||
LOG.info(msg)
|
||||
print msg
|
||||
CONF.action.func(CONF.action.data, CONF.action.mode, CONF.action.pfile)
|
||||
run(CONF.action.func, CONF.action.data,
|
||||
CONF.action.mode, CONF.action.pfile)
|
||||
else:
|
||||
LOG.error(_("Unknown action: %(action)") % {"action":
|
||||
CONF.action.name})
|
||||
|
|
|
@ -1125,6 +1125,8 @@ PARTITION_NAME_PV = "LVM Physical Volume"
|
|||
PARTITION_TABLE_GPT = "gpt"
|
||||
PARTITION_TABLE_MSDOS = "msdos"
|
||||
|
||||
PARTITION_MANAGE_LOCK = "partition-manage"
|
||||
|
||||
# Optional services
|
||||
ALL_OPTIONAL_SERVICES = [SERVICE_TYPE_CINDER, SERVICE_TYPE_MURANO,
|
||||
SERVICE_TYPE_MAGNUM, SERVICE_TYPE_SWIFT,
|
||||
|
|
|
@ -31,11 +31,13 @@ import errno
|
|||
import functools
|
||||
import fcntl
|
||||
import glob
|
||||
import grp
|
||||
import hashlib
|
||||
import itertools as it
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import pwd
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
|
@ -1274,8 +1276,25 @@ def bytes_to_MiB(bytes_number):
|
|||
return bytes_number / float(1024 ** 2)
|
||||
|
||||
|
||||
def check_lock_path():
|
||||
if os.path.isdir(constants.SYSINV_LOCK_PATH):
|
||||
return
|
||||
try:
|
||||
uid = pwd.getpwnam(constants.SYSINV_USERNAME).pw_uid
|
||||
gid = grp.getgrnam(constants.SYSINV_GRPNAME).gr_gid
|
||||
os.makedirs(constants.SYSINV_LOCK_PATH)
|
||||
os.chown(constants.SYSINV_LOCK_PATH, uid, gid)
|
||||
LOG.info("Created directory=%s" %
|
||||
constants.SYSINV_LOCK_PATH)
|
||||
|
||||
except OSError as e:
|
||||
LOG.exception("makedir %s OSError=%s encountered" %
|
||||
(constants.SYSINV_LOCK_PATH, e))
|
||||
|
||||
|
||||
def synchronized(name, external=True):
|
||||
if external:
|
||||
check_lock_path()
|
||||
lock_path = constants.SYSINV_LOCK_PATH
|
||||
else:
|
||||
lock_path = None
|
||||
|
|
|
@ -32,11 +32,9 @@ collection of inventory data for each host.
|
|||
import errno
|
||||
import filecmp
|
||||
import glob
|
||||
import grp
|
||||
import hashlib
|
||||
import httplib
|
||||
import os
|
||||
import pwd
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
|
@ -184,20 +182,7 @@ class ConductorManager(service.PeriodicService):
|
|||
|
||||
# create /var/run/sysinv if required. On DOR, the manifests
|
||||
# may not run to create this volatile directory.
|
||||
|
||||
if not os.path.isdir(constants.SYSINV_LOCK_PATH):
|
||||
try:
|
||||
uid = pwd.getpwnam(constants.SYSINV_USERNAME).pw_uid
|
||||
gid = grp.getgrnam(constants.SYSINV_GRPNAME).gr_gid
|
||||
os.makedirs(constants.SYSINV_LOCK_PATH)
|
||||
os.chown(constants.SYSINV_LOCK_PATH, uid, gid)
|
||||
LOG.info("Created directory=%s" %
|
||||
constants.SYSINV_LOCK_PATH)
|
||||
|
||||
except OSError as e:
|
||||
LOG.exception("makedir %s OSError=%s encountered" %
|
||||
(constants.SYSINV_LOCK_PATH, e))
|
||||
pass
|
||||
cutils.check_lock_path()
|
||||
|
||||
system = self._create_default_system()
|
||||
|
||||
|
|
Loading…
Reference in New Issue