XenAPI: Refactor rotate_xen_guest_logs to avoid races
Moving console logs to a temporary file is a clear race when Nova tries to access a file it might not be there. We need to keep the readable file in place at all times. Atomic operations like mv are fine as if the file was opened before the mv operation Linux will keep the contents available until the file is closed by Nova's plugin. Futher, we need to be able to report the console logs from domains that have been shutdown but still exist on the host. Ensure we keep track of the domain that we last booted with Change-Id: Icfff9886d79f788be871bf28c8b729a57dcd94d2 Fixes-bug: 1466914
This commit is contained in:
parent
abbfdee17d
commit
f985a11bc6
|
@ -200,20 +200,20 @@ class InjectAutoDiskConfigTestCase(VMOpsTestBase):
|
|||
|
||||
class GetConsoleOutputTestCase(VMOpsTestBase):
|
||||
def test_get_console_output_works(self):
|
||||
self.mox.StubOutWithMock(self.vmops, '_get_dom_id')
|
||||
self.mox.StubOutWithMock(self.vmops, '_get_last_dom_id')
|
||||
|
||||
instance = {"name": "dummy"}
|
||||
self.vmops._get_dom_id(instance, check_rescue=True).AndReturn(42)
|
||||
self.vmops._get_last_dom_id(instance, check_rescue=True).AndReturn(42)
|
||||
self.mox.ReplayAll()
|
||||
|
||||
self.assertEqual("dom_id: 42", self.vmops.get_console_output(instance))
|
||||
|
||||
def test_get_console_output_throws_nova_exception(self):
|
||||
self.mox.StubOutWithMock(self.vmops, '_get_dom_id')
|
||||
self.mox.StubOutWithMock(self.vmops, '_get_last_dom_id')
|
||||
|
||||
instance = {"name": "dummy"}
|
||||
# dom_id=0 used to trigger exception in fake XenAPI
|
||||
self.vmops._get_dom_id(instance, check_rescue=True).AndReturn(0)
|
||||
self.vmops._get_last_dom_id(instance, check_rescue=True).AndReturn(0)
|
||||
self.mox.ReplayAll()
|
||||
|
||||
self.assertRaises(exception.NovaException,
|
||||
|
@ -278,6 +278,7 @@ class SpawnTestCase(VMOpsTestBase):
|
|||
self.mox.StubOutWithMock(self.vmops, '_remove_hostname')
|
||||
self.mox.StubOutWithMock(self.vmops.firewall_driver,
|
||||
'apply_instance_filter')
|
||||
self.mox.StubOutWithMock(self.vmops, '_update_last_dom_id')
|
||||
|
||||
def _test_spawn(self, name_label_param=None, block_device_info_param=None,
|
||||
rescue=False, include_root_vdi=True, throw_exception=None,
|
||||
|
@ -389,6 +390,7 @@ class SpawnTestCase(VMOpsTestBase):
|
|||
steps)
|
||||
self.vmops._start(instance, vm_ref)
|
||||
self.vmops._wait_for_instance_to_start(instance, vm_ref)
|
||||
self.vmops._update_last_dom_id(vm_ref)
|
||||
step += 1
|
||||
self.vmops._update_instance_progress(context, instance, step, steps)
|
||||
|
||||
|
@ -505,6 +507,7 @@ class SpawnTestCase(VMOpsTestBase):
|
|||
if power_on:
|
||||
self.vmops._start(instance, vm_ref)
|
||||
self.vmops._wait_for_instance_to_start(instance, vm_ref)
|
||||
self.vmops._update_last_dom_id(vm_ref)
|
||||
|
||||
self.vmops.firewall_driver.apply_instance_filter(instance,
|
||||
network_info)
|
||||
|
|
|
@ -1258,6 +1258,12 @@ iface eth0 inet6 static
|
|||
self._test_spawn_fails_silently_with(exception.AgentError,
|
||||
value=error)
|
||||
|
||||
def test_spawn_sets_last_dom_id(self):
|
||||
self._test_spawn(IMAGE_VHD, None, None,
|
||||
os_type="linux", architecture="x86-64")
|
||||
self.assertEqual(self.vm['domid'],
|
||||
self.vm['other_config']['last_dom_id'])
|
||||
|
||||
def test_rescue(self):
|
||||
instance = self._create_instance(spawn=False, obj=True)
|
||||
xenapi_fake.create_vm(instance['name'], 'Running')
|
||||
|
|
|
@ -394,6 +394,11 @@ class VMOps(object):
|
|||
|
||||
return vdis
|
||||
|
||||
def _update_last_dom_id(self, vm_ref):
|
||||
other_config = self._session.VM.get_other_config(vm_ref)
|
||||
other_config['last_dom_id'] = self._session.VM.get_domid(vm_ref)
|
||||
self._session.VM.set_other_config(vm_ref, other_config)
|
||||
|
||||
def spawn(self, context, instance, image_meta, injected_files,
|
||||
admin_password, network_info=None, block_device_info=None,
|
||||
name_label=None, rescue=False):
|
||||
|
@ -553,6 +558,7 @@ class VMOps(object):
|
|||
if power_on:
|
||||
self._start(instance, vm_ref)
|
||||
self._wait_for_instance_to_start(instance, vm_ref)
|
||||
self._update_last_dom_id(vm_ref)
|
||||
|
||||
@step
|
||||
def configure_booted_instance_step(undo_mgr, vm_ref):
|
||||
|
@ -1753,7 +1759,7 @@ class VMOps(object):
|
|||
|
||||
def get_console_output(self, instance):
|
||||
"""Return last few lines of instance console."""
|
||||
dom_id = self._get_dom_id(instance, check_rescue=True)
|
||||
dom_id = self._get_last_dom_id(instance, check_rescue=True)
|
||||
|
||||
try:
|
||||
raw_console_data = self._session.call_plugin('console',
|
||||
|
@ -2001,6 +2007,13 @@ class VMOps(object):
|
|||
raise exception.InstanceNotFound(instance_id=instance['name'])
|
||||
return domid
|
||||
|
||||
def _get_last_dom_id(self, instance=None, vm_ref=None, check_rescue=False):
|
||||
vm_ref = vm_ref or self._get_vm_opaque_ref(instance, check_rescue)
|
||||
other_config = self._session.call_xenapi("VM.get_other_config", vm_ref)
|
||||
if 'last_dom_id' not in other_config:
|
||||
raise exception.InstanceNotFound(instance_id=instance['name'])
|
||||
return other_config['last_dom_id']
|
||||
|
||||
def _add_to_param_xenstore(self, vm_ref, key, val):
|
||||
"""Takes a key/value pair and adds it to the xenstore parameter
|
||||
record for the given vm instance. If the key exists in xenstore,
|
||||
|
|
|
@ -18,48 +18,32 @@ log_dir="/var/log/xen/guest"
|
|||
kb=1024
|
||||
max_size_bytes=$(($kb*$kb))
|
||||
truncated_size_bytes=$((5*$kb))
|
||||
list_domains=/opt/xensource/bin/list_domains
|
||||
|
||||
log_file_base="${log_dir}/console."
|
||||
tmp_file_base="${log_dir}/tmp.console."
|
||||
|
||||
# Ensure logging is setup correctly for all domains
|
||||
xenstore-write /local/logconsole/@ "${log_file_base}%d"
|
||||
|
||||
# Move logs we want to keep
|
||||
domains=$($list_domains | sed '/^id*/d' | sed 's/|.*|.*$//g' | xargs)
|
||||
for i in $domains; do
|
||||
log="${log_file_base}$i"
|
||||
tmp="${tmp_file_base}$i"
|
||||
mv $log $tmp || true
|
||||
# Ensure the last_dom_id is set + updated for all running VMs
|
||||
for vm in $(xe vm-list power-state=running --minimal | tr ',' ' '); do
|
||||
xe vm-param-set uuid=$vm other-config:last_dom_id=$(xe vm-param-get uuid=$vm param-name=dom-id)
|
||||
done
|
||||
|
||||
# Delete all console logs,
|
||||
# mostly to remove logs from recently killed domains
|
||||
rm -f ${log_dir}/console.*
|
||||
# Get the last_dom_id for all VMs
|
||||
valid_last_dom_ids=$(xe vm-list params=other-config --minimal | tr ';,' '\n\n' | grep last_dom_id | sed -e 's/last_dom_id: //g' | xargs)
|
||||
|
||||
# Reload domain list, in case it changed
|
||||
# (note we may have just deleted a new console log)
|
||||
domains=$($list_domains | sed '/^id*/d' | sed 's/|.*|.*$//g' | xargs)
|
||||
for i in $domains; do
|
||||
log="${log_file_base}$i"
|
||||
tmp="${tmp_file_base}$i"
|
||||
# Remove console files that do not correspond to valid last_dom_id's
|
||||
allowed_consoles=".*console.\(${valid_last_dom_ids// /\\|}\)$"
|
||||
find $log_dir -type f -not -regex $allowed_consoles -delete
|
||||
|
||||
if [ -e "$tmp" ]; then
|
||||
size=$(stat -c%s "$tmp")
|
||||
|
||||
# Trim the log if required
|
||||
if [ "$size" -gt "$max_size_bytes" ]; then
|
||||
tail -c $truncated_size_bytes $tmp > $log || true
|
||||
else
|
||||
mv $tmp $log || true
|
||||
fi
|
||||
fi
|
||||
# Truncate all remaining logs
|
||||
for log in `find $log_dir -type f -regex '.*console.*' -size +${max_size_bytes}c`; do
|
||||
tmp="$log.tmp"
|
||||
tail -c $truncated_size_bytes $log > $tmp
|
||||
mv -f $tmp $log
|
||||
|
||||
# Notify xen that it needs to reload the file
|
||||
xenstore-write /local/logconsole/$i $log
|
||||
xenstore-rm /local/logconsole/$i
|
||||
domid=${log##*.}
|
||||
xenstore-write /local/logconsole/$domid $log
|
||||
xenstore-rm /local/logconsole/$domid
|
||||
done
|
||||
|
||||
# Delete all the tmp files
|
||||
rm -f ${tmp_file_base}* || true
|
||||
|
|
Loading…
Reference in New Issue