Speed up log file fetching tasks

This replaces ansible loop tasks which rename log files to have a .txt
suffix with an ansible module task. The reason for this is each ansible
loop iteration can be very slow; up to 3 seconds per task. When
many log files need to be renamed this adds up quickly. The module can
do all of the renames in a single python process that should be much
quicker.

Note the old interface expected a list of extensions to rename. It then
converted that list of extensions into a regex used to find the files to
rename. The new code does not use regexes. It is possible someone abused
the old interface to hijack the regex and do something extra fun, but
that was not documented nor likely to be safe.

Change-Id: I7033a862d7e42b3acd8ad264aefca50685b317ff
This commit is contained in:
Clark Boylan 2022-08-31 11:16:56 -07:00
parent 1f78388328
commit e05b55fa67
5 changed files with 217 additions and 37 deletions

View File

View File

View File

@ -0,0 +1,155 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
ANSIBLE_METADATA = {
'metadata_version': '1.0',
'status': ['preview'],
'supported_by': 'community'
}
DOCUMENTATION = '''
---
module: stage_output_renames
short_description: Rename file extensions for stage-output
description:
- "Renames file extensions for stage-output quickly."
options:
extensions:
description:
- Dict of file extensions to be replaced with .txt when staging.
key is extension and value is boolean determining if it should
be replaced.
required: true
rename_dir:
description:
- The base dir to rename files in.
required: true
author:
- Clark Boylan
'''
EXAMPLES = '''
- name: Stage output rename extensions
stage_output_renames:
extensions:
conf: true
log: true
py: false
rename_dir: '/home/zuul/logs'
'''
RETURN = '''
msg:
description: The output message from the module.
renamed:
description: Dict of old: new file names.
errors:
description: Dict of old: error strings.
'''
from ansible.module_utils.basic import AnsibleModule # noqa
import os
import os.path
def rename_file(old_path):
# Only rename the file if it is a normal file
if os.path.isfile(old_path) and not os.path.islink(old_path):
path, ext = old_path.rsplit('.', 1)
new_path = path + '_' + ext + '.txt'
# Only rename if the target doesn't already exist
if not os.path.isfile(new_path) and \
not os.path.isdir(new_path) and \
not os.path.islink(new_path):
try:
os.rename(old_path, new_path)
except Exception as e:
# TODO Do we need to distinguish between these cases?
return None, str(e)
return new_path, None
return None, "Target path exists."
return None, "Source path does not exist."
def rename_exts(path, exts):
results = dict(
renamed=dict(),
errors=dict(),
)
for root, dirs, files in os.walk(path):
for f in files:
for e in exts:
if f.endswith(e):
full_path = os.path.join(root, f)
new_path, error = rename_file(full_path)
if error:
results["errors"][full_path] = error
elif new_path:
results["renamed"][full_path] = new_path
break
return results
def run_module():
module_args = dict(
extensions=dict(type='dict', required=True),
rename_dir=dict(type='str', required=True),
)
result = dict(
changed=False,
)
module = AnsibleModule(
argument_spec=module_args,
supports_check_mode=False
)
rename_dir = module.params['rename_dir']
if not os.path.isdir(rename_dir):
module.fail_json(msg='Rename dir %s is not a dir or does not exist' %
rename_dir, **result)
extensions_dict = module.params['extensions']
extensions_list = ['.' + k for k, v in extensions_dict.items()
if module.boolean(v)]
rename_result = rename_exts(rename_dir, extensions_list)
result.update(rename_result)
if result['renamed']:
result['changed'] = True
if result['errors']:
module.fail_json(msg='Failed to rename all requested files', **result)
else:
module.exit_json(msg='Renamed files for staging.', **result)
def main():
run_module()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import os.path
import testtools
import fixtures
from .stage_output_renames import rename_exts
class TestStageOutputRenames(testtools.TestCase):
def test_rename_file(self):
rename_root = self.useFixture(fixtures.TempDir()).path
subdir = os.path.join(rename_root, 'subdir')
os.mkdir(subdir)
renamed_file1 = os.path.join(rename_root, 'foo.log')
renamed_file2 = os.path.join(subdir, 'bar.something.log')
ignored_file = os.path.join(subdir, 'foo.txt')
with open(renamed_file1, 'w') as f:
f.write("This is a test file: renamed 1")
with open(renamed_file2, 'w') as f:
f.write("This is a test file: renamed 2")
with open(ignored_file, 'w') as f:
f.write("This is a test file: ignored")
results = rename_exts(rename_root, ['.log', '.notused'])
# Check the files we expect are present
self.assertTrue(
os.path.isfile('_'.join(renamed_file1.rsplit('.', 1)) + '.txt'))
self.assertTrue(
os.path.isfile('_'.join(renamed_file2.rsplit('.', 1)) + '.txt'))
self.assertTrue(os.path.isfile(ignored_file))
# Check the files we don't expect are gone
self.assertFalse(os.path.isfile(renamed_file1))
self.assertFalse(os.path.isfile(renamed_file2))
self.assertFalse(
os.path.isfile('_'.join(ignored_file.rsplit('.', 1)) + '.txt'))
self.assertFalse(results['errors'])
self.assertEqual(len(results['renamed'].items()), 2)

View File

@ -15,25 +15,6 @@
failed_when: false
register: sudo_result
- name: Build the extensions list from a dict (or empty)
set_fact:
extension_list: >
{% set extensions = ['__do_not_match__'] -%}
{% if extensions_to_txt -%}
{% for extension, extension_bool in extensions_to_txt.items() -%}
{% if extension_bool -%}
{% set _ = extensions.append(extension) -%}
{% endif -%}
{% endfor -%}
{% endif -%}
{{- extensions -}}
- name: Build the extensions regular expression
# https://github.com/ansible/ansible-lint/issues/2241
# noqa var-spacing
set_fact:
extensions_regex: "^(.*)\\.({{ extension_list | join('|') }})$"
# TODO(andreaf) We might want to enforce that zj_source.value is a valid value
# in docs, artifacts, logs. Null case already handled.
# NOTE(andreaf) Files or folders that start with a '.' are renamed to starting
@ -90,24 +71,13 @@
recurse: yes
become: "{{ sudo_result.rc == 0 }}"
- name: Discover log files that match extension_list
find:
paths: "{{ stage_dir }}/logs"
patterns: "{{ extensions_regex }}"
use_regex: true
recurse: true
file_type: 'file'
register: log_files_to_rename
- name: Rename log files that match extension_list
shell: "mv {{ zj_log_file.path }} {{ zj_log_file.path | regex_replace(extensions_regex, '\\1_\\2.txt') }}"
with_items: "{{ log_files_to_rename.files }}"
loop_control:
loop_var: zj_log_file
args:
chdir: "{{ stage_dir }}/logs"
tags:
- skip_ansible_lint
# Do this in a module as looping tasks with ansible is very slow
# with large lists of files. Up to three seconds per file has been observed.
- name: Rename log files that match extensions_to_txt
stage_output_renames:
extensions: "{{ extensions_to_txt }}"
rename_dir: "{{ stage_dir }}/logs"
when: extensions_to_txt is defined and extensions_to_txt is not none
- name: Collect log files
block: