Add upload-logs-azure role

Add support for uploading logs to Azure blob storage.

Change-Id: I0347977324b880123c6ed83ded3c39eb210612e2
This commit is contained in:
James E. Blair 2021-03-21 13:59:53 -07:00
parent 0eaa5cf59a
commit 120a11ef20
8 changed files with 493 additions and 0 deletions

View File

@ -13,6 +13,7 @@ Log Roles
.. zuul:autorole:: publish-artifacts-to-fileserver
.. zuul:autorole:: set-zuul-log-path-fact
.. zuul:autorole:: upload-logs
.. zuul:autorole:: upload-logs-azure
.. zuul:autorole:: upload-logs-gcs
.. zuul:autorole:: upload-logs-s3
.. zuul:autorole:: upload-logs-swift

View File

@ -0,0 +1,64 @@
Upload logs to Azure Storage
Before using this role, create a storage account in Azure and obtain
an `Access key` for the account. You may create a container within
the account, or allow this role to create the container (or
containers) for you.
**Role Variables**
.. zuul:rolevar:: zuul_site_upload_logs
:default: true
Controls when logs are uploaded. true, the default, means always
upload logs. false means never upload logs. 'failure' means to only
upload logs when the job has failed.
.. note:: Intended to be set by admins via site-variables.
.. zuul:rolevar:: zuul_log_partition
:default: false
If set to true, then the first component of the log path will be
removed from the object name and added to the container name, so
that logs for different changes are distributed across a large
number of containers.
.. zuul:rolevar:: zuul_log_container
If partitioning is not enabled, this is the name of the container
which will be used. If partitioning is enabled, then this will be
used as the prefix for the container name which will be separated
from the partition name by an underscore. For example, "logs_42"
would be the container name for partition 42.
.. zuul:rolevar:: zuul_log_container_public
:default: true
If the container is created, should it be created with global read
ACLs. If the container already exists, it will not be modified.
.. zuul:rolevar:: zuul_log_path
:default: Generated by the role `set-zuul-log-path-fact`
Prepend this path to the object names when uploading.
.. zuul:rolevar:: zuul_log_create_indexes
:default: true
Whether to create `index.html` files with directory indexes.
.. zuul:rolevar:: zuul_log_path_shard_build
:default: false
This var is consumed by set-zuul-log-path-fact which
upload-logs-azure calls into. If you set this you will get log
paths prefixed with the first three characters of the build
uuid. This will improve log file sharding.
More details can be found at
:zuul:rolevar:`set-zuul-log-path-fact.zuul_log_path_shard_build`.
.. zuul:rolevar:: zuul_log_connection_string
The Access key connection string for the Azure storage account.

View File

@ -0,0 +1,3 @@
zuul_log_partition: false
zuul_log_container_public: true
zuul_log_create_indexes: true

View File

@ -0,0 +1,2 @@
dependencies:
- role: upload-logs-base

View File

@ -0,0 +1,39 @@
- name: Set zuul-log-path fact
include_role:
name: set-zuul-log-path-fact
when: zuul_log_path is not defined
# Always upload (true), never upload (false) or only on failure ('failure')
- when: zuul_site_upload_logs | default(true) | bool or
(zuul_site_upload_logs == 'failure' and not zuul_success | bool)
block:
# Use chmod instead of file because ansible 2.5 file with recurse and
# follow can't really handle symlinks to .
- name: Ensure logs are readable before uploading
delegate_to: localhost
command: "chmod -R u=rwX,g=rX,o=rX {{ zuul.executor.log_root }}/"
# ANSIBLE0007 chmod used in place of argument mode to file
tags:
- skip_ansible_lint
- name: Upload logs to Azure
delegate_to: localhost
no_log: true
zuul_azure_storage_upload:
partition: "{{ zuul_log_partition }}"
container: "{{ zuul_log_container }}"
public: "{{ zuul_log_container_public }}"
prefix: "{{ zuul_log_path }}"
indexes: "{{ zuul_log_create_indexes }}"
connection_string: "{{ zuul_log_connection_string }}"
files:
- "{{ zuul.executor.log_root }}/"
register: upload_results
- name: Return log URL to Zuul
delegate_to: localhost
zuul_return:
data:
zuul:
log_url: "{{ upload_results.url }}/"
when: upload_results is defined

View File

@ -0,0 +1,64 @@
# Copyright (C) 2018-2019 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import os
import testtools
try:
from unittest import mock
except ImportError:
import mock
from .zuul_azure_upload import Uploader
from ..module_utils.zuul_jobs.upload_utils import FileDetail
FIXTURE_DIR = os.path.join(os.path.dirname(__file__),
'test-fixtures')
class TestUpload(testtools.TestCase):
def test_upload_result(self):
client = mock.Mock()
uploader = Uploader(client=client, container="container")
# Get some test files to upload
files = [
FileDetail(
os.path.join(FIXTURE_DIR, "logs/job-output.json"),
"job-output.json",
),
FileDetail(
os.path.join(FIXTURE_DIR, "logs/zuul-info/inventory.yaml"),
"inventory.yaml",
),
]
uploader.upload(files)
client.create_container.assert_called_with(
'container', public_access='container')
upload_calls = uploader.client.get_blob_client.mock_calls
self.assertIn(
mock.call(container='container', blob='job-output.json'),
upload_calls)
self.assertIn(
mock.call(container='container', blob='inventory.yaml'),
upload_calls)

View File

@ -0,0 +1,317 @@
#!/usr/bin/env python3
#
# Copyright 2014 Rackspace Australia
# Copyright 2018-2019 Red Hat, Inc
# Copyright 2021 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
"""
Utility to upload files to Azure
Run this from the CLI from the zuul-jobs/roles directory with:
python -m upload-logs-base.library.zuul_azure_storage_upload
"""
import argparse
import logging
import os
try:
import queue as queuelib
except ImportError:
import Queue as queuelib
import sys
import threading
from azure.storage.blob import (
BlobServiceClient, CorsRule, ContentSettings
)
from azure.core.exceptions import ResourceExistsError
from ansible.module_utils.basic import AnsibleModule
try:
# Ansible context
from ansible.module_utils.zuul_jobs.upload_utils import (
FileList,
GZIPCompressedStream,
Indexer,
retry_function,
)
except ImportError:
# Test context
from ..module_utils.zuul_jobs.upload_utils import (
FileList,
GZIPCompressedStream,
Indexer,
retry_function,
)
MAX_UPLOAD_THREADS = 24
class Uploader():
def __init__(self, client, container, prefix=None,
public=True, dry_run=False):
self.dry_run = dry_run
if dry_run:
self.url = 'https://example.com/a/path/'
return
self.client = client
self.prefix = prefix or ''
self.container = container
if public:
public = 'container'
else:
public = None
try:
cc = client.create_container(container, public_access=public)
except ResourceExistsError:
cc = client.get_container_client(container)
cors_rule = CorsRule(['*'], ['GET', 'HEAD'])
cors = [cors_rule]
client.set_service_properties(cors=cors)
self.url = cc.url
def upload(self, file_list):
"""Spin up thread pool to upload to storage"""
if self.dry_run:
return
num_threads = min(len(file_list), MAX_UPLOAD_THREADS)
threads = []
queue = queuelib.Queue()
# add items to queue
for f in file_list:
queue.put(f)
for x in range(num_threads):
t = threading.Thread(target=self.post_thread, args=(queue,))
threads.append(t)
t.start()
for t in threads:
t.join()
def post_thread(self, queue):
while True:
try:
file_detail = queue.get_nowait()
logging.debug("%s: processing job %s",
threading.current_thread(),
file_detail)
retry_function(lambda: self._post_file(file_detail))
except IOError:
# Do our best to attempt to upload all the files
logging.exception("Error opening file")
continue
except queuelib.Empty:
# No more work to do
return
@staticmethod
def _is_text_type(mimetype):
# We want to compress all text types.
if mimetype.startswith('text/'):
return True
# Further compress types that typically contain text but are no
# text sub type.
compress_types = [
'application/json',
'image/svg+xml',
]
if mimetype in compress_types:
return True
return False
def _post_file(self, file_detail):
relative_path = os.path.join(self.prefix, file_detail.relative_path)
content_encoding = None
if file_detail.folder:
# We don't need to upload folders to Azure
return
if (file_detail.encoding is None and
self._is_text_type(file_detail.mimetype)):
content_encoding = 'gzip'
data = GZIPCompressedStream(open(file_detail.full_path, 'rb'))
else:
if (not file_detail.filename.endswith(".gz") and
file_detail.encoding):
# Don't apply gzip encoding to files that we receive as
# already gzipped. The reason for this is storage will
# serve this back to users as an uncompressed file if they
# don't set an accept-encoding that includes gzip. This
# can cause problems when the desired file state is
# compressed as with .tar.gz tarballs.
content_encoding = file_detail.encoding
data = open(file_detail.full_path, 'rb')
blob = self.client.get_blob_client(container=self.container,
blob=relative_path)
content_settings = ContentSettings(
content_type=file_detail.mimetype,
content_encoding=content_encoding)
blob.upload_blob(data)
blob.set_http_headers(content_settings)
def run(container, files,
indexes=True, parent_links=True, topdir_parent_link=False,
partition=False, footer='index_footer.html',
prefix=None, public=True, dry_run=False, connection_string=None):
client = BlobServiceClient.from_connection_string(connection_string)
if prefix:
prefix = prefix.lstrip('/')
if partition and prefix:
parts = prefix.split('/')
if len(parts) > 1:
container += '_' + parts[0]
prefix = '/'.join(parts[1:])
# Create the objects to make sure the arguments are sound.
with FileList() as file_list:
# Scan the files.
for file_path in files:
file_list.add(file_path)
indexer = Indexer(file_list)
# (Possibly) make indexes.
if indexes:
indexer.make_indexes(create_parent_links=parent_links,
create_topdir_parent_link=topdir_parent_link,
append_footer=footer)
logging.debug("List of files prepared to upload:")
for x in file_list:
logging.debug(x)
# Upload.
uploader = Uploader(client, container, prefix, public, dry_run)
uploader.upload(file_list)
return uploader.url
def ansible_main():
module = AnsibleModule(
argument_spec=dict(
container=dict(required=True, type='str'),
files=dict(required=True, type='list'),
partition=dict(type='bool', default=False),
indexes=dict(type='bool', default=True),
parent_links=dict(type='bool', default=True),
topdir_parent_link=dict(type='bool', default=False),
public=dict(type='bool', default=True),
footer=dict(type='str'),
prefix=dict(type='str'),
connection_string=dict(type='str'),
)
)
p = module.params
url = run(p.get('container'), p.get('files'),
indexes=p.get('indexes'),
parent_links=p.get('parent_links'),
topdir_parent_link=p.get('topdir_parent_link'),
partition=p.get('partition'),
footer=p.get('footer'),
prefix=p.get('prefix'),
public=p.get('public'),
connection_string=p.get('connection_string'))
module.exit_json(changed=True,
url=url)
def cli_main():
parser = argparse.ArgumentParser(
description="Upload files to Azure Storage"
)
parser.add_argument('--verbose', action='store_true',
help='show debug information')
parser.add_argument('--no-indexes', action='store_true',
help='do not generate any indexes at all')
parser.add_argument('--no-parent-links', action='store_true',
help='do not include links back to a parent dir')
parser.add_argument('--create-topdir-parent-link', action='store_true',
help='include a link in the root directory of the '
'files to the parent directory which may be the '
'index of all results')
parser.add_argument('--no-public', action='store_true',
help='do not create the container as public')
parser.add_argument('--partition', action='store_true',
help='partition the prefix into multiple containers')
parser.add_argument('--append-footer', default='index_footer.html',
help='when generating an index, if the given file is '
'present in a directory, append it to the index '
'(set to "none" to disable)')
parser.add_argument('--prefix',
help='Prepend this path to the object names when '
'uploading')
parser.add_argument('--dry-run', action='store_true',
help='do not attempt to create containers or upload, '
'useful with --verbose for debugging')
parser.add_argument('--connection-string',
help='An Azure access key connection string')
parser.add_argument('container',
help='Name of the container to use when uploading')
parser.add_argument('files', nargs='+',
help='the file(s) to upload with recursive glob '
'matching when supplied as a string')
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
logging.captureWarnings(True)
append_footer = args.append_footer
if append_footer.lower() == 'none':
append_footer = None
url = run(args.container, args.files,
indexes=not args.no_indexes,
parent_links=not args.no_parent_links,
topdir_parent_link=args.create_topdir_parent_link,
partition=args.partition,
footer=append_footer,
prefix=args.prefix,
public=not args.no_public,
dry_run=args.dry_run,
connection_string=args.connection_string)
print(url)
if __name__ == '__main__':
if sys.stdin.isatty():
cli_main()
else:
ansible_main()

View File

@ -32,5 +32,8 @@ google-cloud-storage
# For upload-logs-s3
boto3
# For upload-logs-azure
azure-storage-blob
# unittest.mock compatibility package for Python < 3.3
mock;python_version<'3.3'