XenAPI: Refactor rotate_xen_guest_logs to avoid races

Moving console logs to a temporary file is a clear race when Nova tries to
access a file it might not be there.

We need to keep the readable file in place at all times.  Atomic operations
like mv are fine as if the file was opened before the mv operation Linux
will keep the contents available until the file is closed by Nova's plugin.

Futher, we need to be able to report the console logs from domains that have
been shutdown but still exist on the host.  Ensure we keep track of the domain
that we last booted with

Change-Id: Icfff9886d79f788be871bf28c8b729a57dcd94d2
Fixes-bug: 1466914
This commit is contained in:
Bob Ball 2015-06-19 17:53:11 +01:00
parent abbfdee17d
commit f985a11bc6
4 changed files with 43 additions and 37 deletions

View File

@ -200,20 +200,20 @@ class InjectAutoDiskConfigTestCase(VMOpsTestBase):
class GetConsoleOutputTestCase(VMOpsTestBase):
def test_get_console_output_works(self):
self.mox.StubOutWithMock(self.vmops, '_get_dom_id')
self.mox.StubOutWithMock(self.vmops, '_get_last_dom_id')
instance = {"name": "dummy"}
self.vmops._get_dom_id(instance, check_rescue=True).AndReturn(42)
self.vmops._get_last_dom_id(instance, check_rescue=True).AndReturn(42)
self.mox.ReplayAll()
self.assertEqual("dom_id: 42", self.vmops.get_console_output(instance))
def test_get_console_output_throws_nova_exception(self):
self.mox.StubOutWithMock(self.vmops, '_get_dom_id')
self.mox.StubOutWithMock(self.vmops, '_get_last_dom_id')
instance = {"name": "dummy"}
# dom_id=0 used to trigger exception in fake XenAPI
self.vmops._get_dom_id(instance, check_rescue=True).AndReturn(0)
self.vmops._get_last_dom_id(instance, check_rescue=True).AndReturn(0)
self.mox.ReplayAll()
self.assertRaises(exception.NovaException,
@ -278,6 +278,7 @@ class SpawnTestCase(VMOpsTestBase):
self.mox.StubOutWithMock(self.vmops, '_remove_hostname')
self.mox.StubOutWithMock(self.vmops.firewall_driver,
'apply_instance_filter')
self.mox.StubOutWithMock(self.vmops, '_update_last_dom_id')
def _test_spawn(self, name_label_param=None, block_device_info_param=None,
rescue=False, include_root_vdi=True, throw_exception=None,
@ -389,6 +390,7 @@ class SpawnTestCase(VMOpsTestBase):
steps)
self.vmops._start(instance, vm_ref)
self.vmops._wait_for_instance_to_start(instance, vm_ref)
self.vmops._update_last_dom_id(vm_ref)
step += 1
self.vmops._update_instance_progress(context, instance, step, steps)
@ -505,6 +507,7 @@ class SpawnTestCase(VMOpsTestBase):
if power_on:
self.vmops._start(instance, vm_ref)
self.vmops._wait_for_instance_to_start(instance, vm_ref)
self.vmops._update_last_dom_id(vm_ref)
self.vmops.firewall_driver.apply_instance_filter(instance,
network_info)

View File

@ -1258,6 +1258,12 @@ iface eth0 inet6 static
self._test_spawn_fails_silently_with(exception.AgentError,
value=error)
def test_spawn_sets_last_dom_id(self):
self._test_spawn(IMAGE_VHD, None, None,
os_type="linux", architecture="x86-64")
self.assertEqual(self.vm['domid'],
self.vm['other_config']['last_dom_id'])
def test_rescue(self):
instance = self._create_instance(spawn=False, obj=True)
xenapi_fake.create_vm(instance['name'], 'Running')

View File

@ -394,6 +394,11 @@ class VMOps(object):
return vdis
def _update_last_dom_id(self, vm_ref):
other_config = self._session.VM.get_other_config(vm_ref)
other_config['last_dom_id'] = self._session.VM.get_domid(vm_ref)
self._session.VM.set_other_config(vm_ref, other_config)
def spawn(self, context, instance, image_meta, injected_files,
admin_password, network_info=None, block_device_info=None,
name_label=None, rescue=False):
@ -553,6 +558,7 @@ class VMOps(object):
if power_on:
self._start(instance, vm_ref)
self._wait_for_instance_to_start(instance, vm_ref)
self._update_last_dom_id(vm_ref)
@step
def configure_booted_instance_step(undo_mgr, vm_ref):
@ -1753,7 +1759,7 @@ class VMOps(object):
def get_console_output(self, instance):
"""Return last few lines of instance console."""
dom_id = self._get_dom_id(instance, check_rescue=True)
dom_id = self._get_last_dom_id(instance, check_rescue=True)
try:
raw_console_data = self._session.call_plugin('console',
@ -2001,6 +2007,13 @@ class VMOps(object):
raise exception.InstanceNotFound(instance_id=instance['name'])
return domid
def _get_last_dom_id(self, instance=None, vm_ref=None, check_rescue=False):
vm_ref = vm_ref or self._get_vm_opaque_ref(instance, check_rescue)
other_config = self._session.call_xenapi("VM.get_other_config", vm_ref)
if 'last_dom_id' not in other_config:
raise exception.InstanceNotFound(instance_id=instance['name'])
return other_config['last_dom_id']
def _add_to_param_xenstore(self, vm_ref, key, val):
"""Takes a key/value pair and adds it to the xenstore parameter
record for the given vm instance. If the key exists in xenstore,

View File

@ -18,48 +18,32 @@ log_dir="/var/log/xen/guest"
kb=1024
max_size_bytes=$(($kb*$kb))
truncated_size_bytes=$((5*$kb))
list_domains=/opt/xensource/bin/list_domains
log_file_base="${log_dir}/console."
tmp_file_base="${log_dir}/tmp.console."
# Ensure logging is setup correctly for all domains
xenstore-write /local/logconsole/@ "${log_file_base}%d"
# Move logs we want to keep
domains=$($list_domains | sed '/^id*/d' | sed 's/|.*|.*$//g' | xargs)
for i in $domains; do
log="${log_file_base}$i"
tmp="${tmp_file_base}$i"
mv $log $tmp || true
# Ensure the last_dom_id is set + updated for all running VMs
for vm in $(xe vm-list power-state=running --minimal | tr ',' ' '); do
xe vm-param-set uuid=$vm other-config:last_dom_id=$(xe vm-param-get uuid=$vm param-name=dom-id)
done
# Delete all console logs,
# mostly to remove logs from recently killed domains
rm -f ${log_dir}/console.*
# Get the last_dom_id for all VMs
valid_last_dom_ids=$(xe vm-list params=other-config --minimal | tr ';,' '\n\n' | grep last_dom_id | sed -e 's/last_dom_id: //g' | xargs)
# Reload domain list, in case it changed
# (note we may have just deleted a new console log)
domains=$($list_domains | sed '/^id*/d' | sed 's/|.*|.*$//g' | xargs)
for i in $domains; do
log="${log_file_base}$i"
tmp="${tmp_file_base}$i"
# Remove console files that do not correspond to valid last_dom_id's
allowed_consoles=".*console.\(${valid_last_dom_ids// /\\|}\)$"
find $log_dir -type f -not -regex $allowed_consoles -delete
if [ -e "$tmp" ]; then
size=$(stat -c%s "$tmp")
# Trim the log if required
if [ "$size" -gt "$max_size_bytes" ]; then
tail -c $truncated_size_bytes $tmp > $log || true
else
mv $tmp $log || true
fi
fi
# Truncate all remaining logs
for log in `find $log_dir -type f -regex '.*console.*' -size +${max_size_bytes}c`; do
tmp="$log.tmp"
tail -c $truncated_size_bytes $log > $tmp
mv -f $tmp $log
# Notify xen that it needs to reload the file
xenstore-write /local/logconsole/$i $log
xenstore-rm /local/logconsole/$i
domid=${log##*.}
xenstore-write /local/logconsole/$domid $log
xenstore-rm /local/logconsole/$domid
done
# Delete all the tmp files
rm -f ${tmp_file_base}* || true