diff --git a/doc/source/log-roles.rst b/doc/source/log-roles.rst index 908da080c..de7782b22 100644 --- a/doc/source/log-roles.rst +++ b/doc/source/log-roles.rst @@ -13,6 +13,7 @@ Log Roles .. zuul:autorole:: publish-artifacts-to-fileserver .. zuul:autorole:: set-zuul-log-path-fact .. zuul:autorole:: upload-logs +.. zuul:autorole:: upload-logs-azure .. zuul:autorole:: upload-logs-gcs .. zuul:autorole:: upload-logs-s3 .. zuul:autorole:: upload-logs-swift diff --git a/roles/upload-logs-azure/README.rst b/roles/upload-logs-azure/README.rst new file mode 100644 index 000000000..492b7fc32 --- /dev/null +++ b/roles/upload-logs-azure/README.rst @@ -0,0 +1,64 @@ +Upload logs to Azure Storage + +Before using this role, create a storage account in Azure and obtain +an `Access key` for the account. You may create a container within +the account, or allow this role to create the container (or +containers) for you. + +**Role Variables** + +.. zuul:rolevar:: zuul_site_upload_logs + :default: true + + Controls when logs are uploaded. true, the default, means always + upload logs. false means never upload logs. 'failure' means to only + upload logs when the job has failed. + + .. note:: Intended to be set by admins via site-variables. + +.. zuul:rolevar:: zuul_log_partition + :default: false + + If set to true, then the first component of the log path will be + removed from the object name and added to the container name, so + that logs for different changes are distributed across a large + number of containers. + +.. zuul:rolevar:: zuul_log_container + + If partitioning is not enabled, this is the name of the container + which will be used. If partitioning is enabled, then this will be + used as the prefix for the container name which will be separated + from the partition name by an underscore. For example, "logs_42" + would be the container name for partition 42. + +.. zuul:rolevar:: zuul_log_container_public + :default: true + + If the container is created, should it be created with global read + ACLs. If the container already exists, it will not be modified. + +.. zuul:rolevar:: zuul_log_path + :default: Generated by the role `set-zuul-log-path-fact` + + Prepend this path to the object names when uploading. + +.. zuul:rolevar:: zuul_log_create_indexes + :default: true + + Whether to create `index.html` files with directory indexes. + +.. zuul:rolevar:: zuul_log_path_shard_build + :default: false + + This var is consumed by set-zuul-log-path-fact which + upload-logs-azure calls into. If you set this you will get log + paths prefixed with the first three characters of the build + uuid. This will improve log file sharding. + + More details can be found at + :zuul:rolevar:`set-zuul-log-path-fact.zuul_log_path_shard_build`. + +.. zuul:rolevar:: zuul_log_connection_string + + The Access key connection string for the Azure storage account. diff --git a/roles/upload-logs-azure/defaults/main.yaml b/roles/upload-logs-azure/defaults/main.yaml new file mode 100644 index 000000000..4d7c551b0 --- /dev/null +++ b/roles/upload-logs-azure/defaults/main.yaml @@ -0,0 +1,3 @@ +zuul_log_partition: false +zuul_log_container_public: true +zuul_log_create_indexes: true diff --git a/roles/upload-logs-azure/meta/main.yaml b/roles/upload-logs-azure/meta/main.yaml new file mode 100644 index 000000000..3a4055855 --- /dev/null +++ b/roles/upload-logs-azure/meta/main.yaml @@ -0,0 +1,2 @@ +dependencies: + - role: upload-logs-base diff --git a/roles/upload-logs-azure/tasks/main.yaml b/roles/upload-logs-azure/tasks/main.yaml new file mode 100644 index 000000000..59c0b1183 --- /dev/null +++ b/roles/upload-logs-azure/tasks/main.yaml @@ -0,0 +1,39 @@ +- name: Set zuul-log-path fact + include_role: + name: set-zuul-log-path-fact + when: zuul_log_path is not defined + +# Always upload (true), never upload (false) or only on failure ('failure') +- when: zuul_site_upload_logs | default(true) | bool or + (zuul_site_upload_logs == 'failure' and not zuul_success | bool) + block: + # Use chmod instead of file because ansible 2.5 file with recurse and + # follow can't really handle symlinks to . + - name: Ensure logs are readable before uploading + delegate_to: localhost + command: "chmod -R u=rwX,g=rX,o=rX {{ zuul.executor.log_root }}/" + # ANSIBLE0007 chmod used in place of argument mode to file + tags: + - skip_ansible_lint + + - name: Upload logs to Azure + delegate_to: localhost + no_log: true + zuul_azure_storage_upload: + partition: "{{ zuul_log_partition }}" + container: "{{ zuul_log_container }}" + public: "{{ zuul_log_container_public }}" + prefix: "{{ zuul_log_path }}" + indexes: "{{ zuul_log_create_indexes }}" + connection_string: "{{ zuul_log_connection_string }}" + files: + - "{{ zuul.executor.log_root }}/" + register: upload_results + +- name: Return log URL to Zuul + delegate_to: localhost + zuul_return: + data: + zuul: + log_url: "{{ upload_results.url }}/" + when: upload_results is defined diff --git a/roles/upload-logs-base/library/test_zuul_azure_upload.py b/roles/upload-logs-base/library/test_zuul_azure_upload.py new file mode 100644 index 000000000..3ffc443b8 --- /dev/null +++ b/roles/upload-logs-base/library/test_zuul_azure_upload.py @@ -0,0 +1,64 @@ +# Copyright (C) 2018-2019 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# +# See the License for the specific language governing permissions and +# limitations under the License. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import os +import testtools +try: + from unittest import mock +except ImportError: + import mock + +from .zuul_azure_upload import Uploader +from ..module_utils.zuul_jobs.upload_utils import FileDetail + + +FIXTURE_DIR = os.path.join(os.path.dirname(__file__), + 'test-fixtures') + + +class TestUpload(testtools.TestCase): + + def test_upload_result(self): + client = mock.Mock() + uploader = Uploader(client=client, container="container") + + # Get some test files to upload + files = [ + FileDetail( + os.path.join(FIXTURE_DIR, "logs/job-output.json"), + "job-output.json", + ), + FileDetail( + os.path.join(FIXTURE_DIR, "logs/zuul-info/inventory.yaml"), + "inventory.yaml", + ), + ] + + uploader.upload(files) + client.create_container.assert_called_with( + 'container', public_access='container') + + upload_calls = uploader.client.get_blob_client.mock_calls + self.assertIn( + mock.call(container='container', blob='job-output.json'), + upload_calls) + self.assertIn( + mock.call(container='container', blob='inventory.yaml'), + upload_calls) diff --git a/roles/upload-logs-base/library/zuul_azure_upload.py b/roles/upload-logs-base/library/zuul_azure_upload.py new file mode 100644 index 000000000..61defaf24 --- /dev/null +++ b/roles/upload-logs-base/library/zuul_azure_upload.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 +# +# Copyright 2014 Rackspace Australia +# Copyright 2018-2019 Red Hat, Inc +# Copyright 2021 Acme Gating, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + + +""" +Utility to upload files to Azure + +Run this from the CLI from the zuul-jobs/roles directory with: + + python -m upload-logs-base.library.zuul_azure_storage_upload +""" + +import argparse +import logging +import os +try: + import queue as queuelib +except ImportError: + import Queue as queuelib +import sys +import threading + +from azure.storage.blob import ( + BlobServiceClient, CorsRule, ContentSettings + +) +from azure.core.exceptions import ResourceExistsError + +from ansible.module_utils.basic import AnsibleModule + +try: + # Ansible context + from ansible.module_utils.zuul_jobs.upload_utils import ( + FileList, + GZIPCompressedStream, + Indexer, + retry_function, + ) +except ImportError: + # Test context + from ..module_utils.zuul_jobs.upload_utils import ( + FileList, + GZIPCompressedStream, + Indexer, + retry_function, + ) + +MAX_UPLOAD_THREADS = 24 + + +class Uploader(): + def __init__(self, client, container, prefix=None, + public=True, dry_run=False): + + self.dry_run = dry_run + if dry_run: + self.url = 'https://example.com/a/path/' + return + + self.client = client + self.prefix = prefix or '' + self.container = container + + if public: + public = 'container' + else: + public = None + try: + cc = client.create_container(container, public_access=public) + except ResourceExistsError: + cc = client.get_container_client(container) + + cors_rule = CorsRule(['*'], ['GET', 'HEAD']) + cors = [cors_rule] + + client.set_service_properties(cors=cors) + + self.url = cc.url + + def upload(self, file_list): + """Spin up thread pool to upload to storage""" + + if self.dry_run: + return + + num_threads = min(len(file_list), MAX_UPLOAD_THREADS) + threads = [] + queue = queuelib.Queue() + # add items to queue + for f in file_list: + queue.put(f) + + for x in range(num_threads): + t = threading.Thread(target=self.post_thread, args=(queue,)) + threads.append(t) + t.start() + + for t in threads: + t.join() + + def post_thread(self, queue): + while True: + try: + file_detail = queue.get_nowait() + logging.debug("%s: processing job %s", + threading.current_thread(), + file_detail) + retry_function(lambda: self._post_file(file_detail)) + except IOError: + # Do our best to attempt to upload all the files + logging.exception("Error opening file") + continue + except queuelib.Empty: + # No more work to do + return + + @staticmethod + def _is_text_type(mimetype): + # We want to compress all text types. + if mimetype.startswith('text/'): + return True + + # Further compress types that typically contain text but are no + # text sub type. + compress_types = [ + 'application/json', + 'image/svg+xml', + ] + if mimetype in compress_types: + return True + return False + + def _post_file(self, file_detail): + relative_path = os.path.join(self.prefix, file_detail.relative_path) + content_encoding = None + + if file_detail.folder: + # We don't need to upload folders to Azure + return + + if (file_detail.encoding is None and + self._is_text_type(file_detail.mimetype)): + content_encoding = 'gzip' + data = GZIPCompressedStream(open(file_detail.full_path, 'rb')) + else: + if (not file_detail.filename.endswith(".gz") and + file_detail.encoding): + # Don't apply gzip encoding to files that we receive as + # already gzipped. The reason for this is storage will + # serve this back to users as an uncompressed file if they + # don't set an accept-encoding that includes gzip. This + # can cause problems when the desired file state is + # compressed as with .tar.gz tarballs. + content_encoding = file_detail.encoding + data = open(file_detail.full_path, 'rb') + + blob = self.client.get_blob_client(container=self.container, + blob=relative_path) + content_settings = ContentSettings( + content_type=file_detail.mimetype, + content_encoding=content_encoding) + blob.upload_blob(data) + blob.set_http_headers(content_settings) + + +def run(container, files, + indexes=True, parent_links=True, topdir_parent_link=False, + partition=False, footer='index_footer.html', + prefix=None, public=True, dry_run=False, connection_string=None): + + client = BlobServiceClient.from_connection_string(connection_string) + + if prefix: + prefix = prefix.lstrip('/') + if partition and prefix: + parts = prefix.split('/') + if len(parts) > 1: + container += '_' + parts[0] + prefix = '/'.join(parts[1:]) + + # Create the objects to make sure the arguments are sound. + with FileList() as file_list: + # Scan the files. + for file_path in files: + file_list.add(file_path) + + indexer = Indexer(file_list) + + # (Possibly) make indexes. + if indexes: + indexer.make_indexes(create_parent_links=parent_links, + create_topdir_parent_link=topdir_parent_link, + append_footer=footer) + + logging.debug("List of files prepared to upload:") + for x in file_list: + logging.debug(x) + + # Upload. + uploader = Uploader(client, container, prefix, public, dry_run) + uploader.upload(file_list) + return uploader.url + + +def ansible_main(): + module = AnsibleModule( + argument_spec=dict( + container=dict(required=True, type='str'), + files=dict(required=True, type='list'), + partition=dict(type='bool', default=False), + indexes=dict(type='bool', default=True), + parent_links=dict(type='bool', default=True), + topdir_parent_link=dict(type='bool', default=False), + public=dict(type='bool', default=True), + footer=dict(type='str'), + prefix=dict(type='str'), + connection_string=dict(type='str'), + ) + ) + + p = module.params + url = run(p.get('container'), p.get('files'), + indexes=p.get('indexes'), + parent_links=p.get('parent_links'), + topdir_parent_link=p.get('topdir_parent_link'), + partition=p.get('partition'), + footer=p.get('footer'), + prefix=p.get('prefix'), + public=p.get('public'), + connection_string=p.get('connection_string')) + module.exit_json(changed=True, + url=url) + + +def cli_main(): + parser = argparse.ArgumentParser( + description="Upload files to Azure Storage" + ) + parser.add_argument('--verbose', action='store_true', + help='show debug information') + parser.add_argument('--no-indexes', action='store_true', + help='do not generate any indexes at all') + parser.add_argument('--no-parent-links', action='store_true', + help='do not include links back to a parent dir') + parser.add_argument('--create-topdir-parent-link', action='store_true', + help='include a link in the root directory of the ' + 'files to the parent directory which may be the ' + 'index of all results') + parser.add_argument('--no-public', action='store_true', + help='do not create the container as public') + parser.add_argument('--partition', action='store_true', + help='partition the prefix into multiple containers') + parser.add_argument('--append-footer', default='index_footer.html', + help='when generating an index, if the given file is ' + 'present in a directory, append it to the index ' + '(set to "none" to disable)') + parser.add_argument('--prefix', + help='Prepend this path to the object names when ' + 'uploading') + parser.add_argument('--dry-run', action='store_true', + help='do not attempt to create containers or upload, ' + 'useful with --verbose for debugging') + parser.add_argument('--connection-string', + help='An Azure access key connection string') + parser.add_argument('container', + help='Name of the container to use when uploading') + parser.add_argument('files', nargs='+', + help='the file(s) to upload with recursive glob ' + 'matching when supplied as a string') + + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + logging.captureWarnings(True) + + append_footer = args.append_footer + if append_footer.lower() == 'none': + append_footer = None + + url = run(args.container, args.files, + indexes=not args.no_indexes, + parent_links=not args.no_parent_links, + topdir_parent_link=args.create_topdir_parent_link, + partition=args.partition, + footer=append_footer, + prefix=args.prefix, + public=not args.no_public, + dry_run=args.dry_run, + connection_string=args.connection_string) + print(url) + + +if __name__ == '__main__': + if sys.stdin.isatty(): + cli_main() + else: + ansible_main() diff --git a/test-requirements.txt b/test-requirements.txt index 35578a462..f01eeb268 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -32,5 +32,8 @@ google-cloud-storage # For upload-logs-s3 boto3 +# For upload-logs-azure +azure-storage-blob + # unittest.mock compatibility package for Python < 3.3 mock;python_version<'3.3'