From e05b55fa6749517f0abf0157f29f74bd55b0a5a7 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Wed, 31 Aug 2022 11:16:56 -0700 Subject: [PATCH] Speed up log file fetching tasks This replaces ansible loop tasks which rename log files to have a .txt suffix with an ansible module task. The reason for this is each ansible loop iteration can be very slow; up to 3 seconds per task. When many log files need to be renamed this adds up quickly. The module can do all of the renames in a single python process that should be much quicker. Note the old interface expected a list of extensions to rename. It then converted that list of extensions into a regex used to find the files to rename. The new code does not use regexes. It is possible someone abused the old interface to hijack the regex and do something extra fun, but that was not documented nor likely to be safe. Change-Id: I7033a862d7e42b3acd8ad264aefca50685b317ff --- roles/stage-output/__init__.py | 0 roles/stage-output/library/__init__.py | 0 .../library/stage_output_renames.py | 155 ++++++++++++++++++ .../library/test_stage_output_renames.py | 55 +++++++ roles/stage-output/tasks/main.yaml | 44 +---- 5 files changed, 217 insertions(+), 37 deletions(-) create mode 100644 roles/stage-output/__init__.py create mode 100644 roles/stage-output/library/__init__.py create mode 100644 roles/stage-output/library/stage_output_renames.py create mode 100644 roles/stage-output/library/test_stage_output_renames.py diff --git a/roles/stage-output/__init__.py b/roles/stage-output/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/roles/stage-output/library/__init__.py b/roles/stage-output/library/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/roles/stage-output/library/stage_output_renames.py b/roles/stage-output/library/stage_output_renames.py new file mode 100644 index 000000000..7e318f1b3 --- /dev/null +++ b/roles/stage-output/library/stage_output_renames.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +ANSIBLE_METADATA = { + 'metadata_version': '1.0', + 'status': ['preview'], + 'supported_by': 'community' +} + + +DOCUMENTATION = ''' +--- +module: stage_output_renames + +short_description: Rename file extensions for stage-output + +description: + - "Renames file extensions for stage-output quickly." + +options: + extensions: + description: + - Dict of file extensions to be replaced with .txt when staging. + key is extension and value is boolean determining if it should + be replaced. + required: true + rename_dir: + description: + - The base dir to rename files in. + required: true + +author: + - Clark Boylan +''' + + +EXAMPLES = ''' +- name: Stage output rename extensions + stage_output_renames: + extensions: + conf: true + log: true + py: false + rename_dir: '/home/zuul/logs' +''' + + +RETURN = ''' +msg: + description: The output message from the module. +renamed: + description: Dict of old: new file names. +errors: + description: Dict of old: error strings. +''' + + +from ansible.module_utils.basic import AnsibleModule # noqa + +import os +import os.path + + +def rename_file(old_path): + # Only rename the file if it is a normal file + if os.path.isfile(old_path) and not os.path.islink(old_path): + path, ext = old_path.rsplit('.', 1) + new_path = path + '_' + ext + '.txt' + # Only rename if the target doesn't already exist + if not os.path.isfile(new_path) and \ + not os.path.isdir(new_path) and \ + not os.path.islink(new_path): + try: + os.rename(old_path, new_path) + except Exception as e: + # TODO Do we need to distinguish between these cases? + return None, str(e) + return new_path, None + return None, "Target path exists." + return None, "Source path does not exist." + + +def rename_exts(path, exts): + results = dict( + renamed=dict(), + errors=dict(), + ) + for root, dirs, files in os.walk(path): + for f in files: + for e in exts: + if f.endswith(e): + full_path = os.path.join(root, f) + new_path, error = rename_file(full_path) + if error: + results["errors"][full_path] = error + elif new_path: + results["renamed"][full_path] = new_path + break + return results + + +def run_module(): + module_args = dict( + extensions=dict(type='dict', required=True), + rename_dir=dict(type='str', required=True), + ) + + result = dict( + changed=False, + ) + + module = AnsibleModule( + argument_spec=module_args, + supports_check_mode=False + ) + + rename_dir = module.params['rename_dir'] + + if not os.path.isdir(rename_dir): + module.fail_json(msg='Rename dir %s is not a dir or does not exist' % + rename_dir, **result) + + extensions_dict = module.params['extensions'] + extensions_list = ['.' + k for k, v in extensions_dict.items() + if module.boolean(v)] + + rename_result = rename_exts(rename_dir, extensions_list) + result.update(rename_result) + + if result['renamed']: + result['changed'] = True + if result['errors']: + module.fail_json(msg='Failed to rename all requested files', **result) + else: + module.exit_json(msg='Renamed files for staging.', **result) + + +def main(): + run_module() + + +if __name__ == '__main__': + main() diff --git a/roles/stage-output/library/test_stage_output_renames.py b/roles/stage-output/library/test_stage_output_renames.py new file mode 100644 index 000000000..67b414e8e --- /dev/null +++ b/roles/stage-output/library/test_stage_output_renames.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import os +import os.path + +import testtools +import fixtures + +from .stage_output_renames import rename_exts + + +class TestStageOutputRenames(testtools.TestCase): + def test_rename_file(self): + rename_root = self.useFixture(fixtures.TempDir()).path + subdir = os.path.join(rename_root, 'subdir') + os.mkdir(subdir) + + renamed_file1 = os.path.join(rename_root, 'foo.log') + renamed_file2 = os.path.join(subdir, 'bar.something.log') + ignored_file = os.path.join(subdir, 'foo.txt') + + with open(renamed_file1, 'w') as f: + f.write("This is a test file: renamed 1") + with open(renamed_file2, 'w') as f: + f.write("This is a test file: renamed 2") + with open(ignored_file, 'w') as f: + f.write("This is a test file: ignored") + + results = rename_exts(rename_root, ['.log', '.notused']) + # Check the files we expect are present + self.assertTrue( + os.path.isfile('_'.join(renamed_file1.rsplit('.', 1)) + '.txt')) + self.assertTrue( + os.path.isfile('_'.join(renamed_file2.rsplit('.', 1)) + '.txt')) + self.assertTrue(os.path.isfile(ignored_file)) + # Check the files we don't expect are gone + self.assertFalse(os.path.isfile(renamed_file1)) + self.assertFalse(os.path.isfile(renamed_file2)) + self.assertFalse( + os.path.isfile('_'.join(ignored_file.rsplit('.', 1)) + '.txt')) + + self.assertFalse(results['errors']) + self.assertEqual(len(results['renamed'].items()), 2) diff --git a/roles/stage-output/tasks/main.yaml b/roles/stage-output/tasks/main.yaml index b5cbd30b0..ec07404f5 100644 --- a/roles/stage-output/tasks/main.yaml +++ b/roles/stage-output/tasks/main.yaml @@ -15,25 +15,6 @@ failed_when: false register: sudo_result -- name: Build the extensions list from a dict (or empty) - set_fact: - extension_list: > - {% set extensions = ['__do_not_match__'] -%} - {% if extensions_to_txt -%} - {% for extension, extension_bool in extensions_to_txt.items() -%} - {% if extension_bool -%} - {% set _ = extensions.append(extension) -%} - {% endif -%} - {% endfor -%} - {% endif -%} - {{- extensions -}} - -- name: Build the extensions regular expression - # https://github.com/ansible/ansible-lint/issues/2241 - # noqa var-spacing - set_fact: - extensions_regex: "^(.*)\\.({{ extension_list | join('|') }})$" - # TODO(andreaf) We might want to enforce that zj_source.value is a valid value # in docs, artifacts, logs. Null case already handled. # NOTE(andreaf) Files or folders that start with a '.' are renamed to starting @@ -90,24 +71,13 @@ recurse: yes become: "{{ sudo_result.rc == 0 }}" -- name: Discover log files that match extension_list - find: - paths: "{{ stage_dir }}/logs" - patterns: "{{ extensions_regex }}" - use_regex: true - recurse: true - file_type: 'file' - register: log_files_to_rename - -- name: Rename log files that match extension_list - shell: "mv {{ zj_log_file.path }} {{ zj_log_file.path | regex_replace(extensions_regex, '\\1_\\2.txt') }}" - with_items: "{{ log_files_to_rename.files }}" - loop_control: - loop_var: zj_log_file - args: - chdir: "{{ stage_dir }}/logs" - tags: - - skip_ansible_lint +# Do this in a module as looping tasks with ansible is very slow +# with large lists of files. Up to three seconds per file has been observed. +- name: Rename log files that match extensions_to_txt + stage_output_renames: + extensions: "{{ extensions_to_txt }}" + rename_dir: "{{ stage_dir }}/logs" + when: extensions_to_txt is defined and extensions_to_txt is not none - name: Collect log files block: