a8b4bc6ff3
This role uploads logs to IBM Cloud object storage. Change-Id: Ibe1131f863a64051b427fcb03b126b1577c4843a
369 lines
12 KiB
Python
Executable File
369 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# Copyright 2014 Rackspace Australia
|
|
# Copyright 2018-2019 Red Hat, Inc
|
|
# Copyright 2021-2022 Acme Gating, LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# Make coding more python3-ish
|
|
from __future__ import (absolute_import, division, print_function)
|
|
__metaclass__ = type
|
|
|
|
|
|
"""
|
|
Utility to upload files to IBM Cloud
|
|
|
|
Run this from the CLI from the zuul-jobs/roles directory with:
|
|
|
|
python -m upload-logs-base.library.zuul_ibm_upload
|
|
"""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
try:
|
|
import queue as queuelib
|
|
except ImportError:
|
|
import Queue as queuelib
|
|
import sys
|
|
import threading
|
|
|
|
from ibm_botocore.client import Config
|
|
import ibm_boto3
|
|
import ibm_boto3.s3.transfer
|
|
|
|
from ansible.module_utils.basic import AnsibleModule
|
|
|
|
try:
|
|
# Ansible context
|
|
from ansible.module_utils.zuul_jobs.upload_utils import (
|
|
FileList,
|
|
GZIPCompressedStream,
|
|
Indexer,
|
|
retry_function,
|
|
)
|
|
except ImportError:
|
|
# Test context
|
|
from ..module_utils.zuul_jobs.upload_utils import (
|
|
FileList,
|
|
GZIPCompressedStream,
|
|
Indexer,
|
|
retry_function,
|
|
)
|
|
|
|
MAX_UPLOAD_THREADS = 24
|
|
|
|
|
|
class Uploader():
|
|
def __init__(self, client, bucket, prefix=None, public=True,
|
|
dry_run=False, endpoint_url=None,
|
|
bucket_location=None):
|
|
self.dry_run = dry_run
|
|
self.public = public
|
|
if dry_run:
|
|
self.url = 'https://example.com/a/path/'
|
|
return
|
|
|
|
self.client = client
|
|
self.prefix = prefix or ''
|
|
self.bucket = bucket
|
|
|
|
self.url = os.path.join(endpoint_url,
|
|
bucket, self.prefix)
|
|
|
|
try:
|
|
self._set_cors(bucket)
|
|
except self.client.exceptions.NoSuchBucket:
|
|
if not bucket_location:
|
|
raise Exception("Bucket location must be specified")
|
|
if public:
|
|
acl = 'public-read'
|
|
else:
|
|
acl = 'private'
|
|
self.client.create_bucket(
|
|
ACL=acl,
|
|
Bucket=bucket,
|
|
CreateBucketConfiguration={
|
|
'LocationConstraint': bucket_location
|
|
}
|
|
)
|
|
self._set_cors(bucket)
|
|
|
|
def _set_cors(self, bucket):
|
|
self.client.put_bucket_cors(
|
|
Bucket=bucket,
|
|
CORSConfiguration={
|
|
'CORSRules': [{
|
|
'AllowedMethods': [
|
|
'GET',
|
|
'HEAD',
|
|
],
|
|
'AllowedOrigins': [
|
|
'*',
|
|
],
|
|
}],
|
|
},
|
|
)
|
|
|
|
def upload(self, file_list):
|
|
"""Spin up thread pool to upload to storage"""
|
|
|
|
if self.dry_run:
|
|
return
|
|
|
|
num_threads = min(len(file_list), MAX_UPLOAD_THREADS)
|
|
threads = []
|
|
queue = queuelib.Queue()
|
|
# add items to queue
|
|
for f in file_list:
|
|
queue.put(f)
|
|
|
|
for x in range(num_threads):
|
|
t = threading.Thread(target=self.post_thread, args=(queue,))
|
|
threads.append(t)
|
|
t.start()
|
|
|
|
for t in threads:
|
|
t.join()
|
|
|
|
def post_thread(self, queue):
|
|
while True:
|
|
try:
|
|
file_detail = queue.get_nowait()
|
|
logging.debug("%s: processing job %s",
|
|
threading.current_thread(),
|
|
file_detail)
|
|
retry_function(lambda: self._post_file(file_detail))
|
|
except IOError:
|
|
# Do our best to attempt to upload all the files
|
|
logging.exception("Error opening file")
|
|
continue
|
|
except queuelib.Empty:
|
|
# No more work to do
|
|
return
|
|
|
|
@staticmethod
|
|
def _is_text_type(mimetype):
|
|
# We want to compress all text types.
|
|
if mimetype.startswith('text/'):
|
|
return True
|
|
|
|
# Further compress types that typically contain text but are no
|
|
# text sub type.
|
|
compress_types = [
|
|
'application/json',
|
|
'image/svg+xml',
|
|
]
|
|
if mimetype in compress_types:
|
|
return True
|
|
return False
|
|
|
|
def _post_file(self, file_detail):
|
|
relative_path = os.path.join(self.prefix, file_detail.relative_path)
|
|
content_encoding = None
|
|
|
|
if file_detail.folder:
|
|
# We don't need to upload folders to IBM
|
|
return
|
|
|
|
if (file_detail.encoding is None and
|
|
self._is_text_type(file_detail.mimetype)):
|
|
content_encoding = 'gzip'
|
|
data = GZIPCompressedStream(open(file_detail.full_path, 'rb'))
|
|
else:
|
|
if (not file_detail.filename.endswith(".gz") and
|
|
file_detail.encoding):
|
|
# Don't apply gzip encoding to files that we receive as
|
|
# already gzipped. The reason for this is storage will
|
|
# serve this back to users as an uncompressed file if they
|
|
# don't set an accept-encoding that includes gzip. This
|
|
# can cause problems when the desired file state is
|
|
# compressed as with .tar.gz tarballs.
|
|
content_encoding = file_detail.encoding
|
|
data = open(file_detail.full_path, 'rb')
|
|
|
|
extra_args = dict(
|
|
ContentType=file_detail.mimetype,
|
|
)
|
|
if content_encoding:
|
|
extra_args['ContentEncoding'] = content_encoding
|
|
|
|
if self.public:
|
|
extra_args['ACL'] = 'public-read'
|
|
|
|
self.client.upload_fileobj(
|
|
data,
|
|
self.bucket,
|
|
relative_path,
|
|
ExtraArgs=extra_args
|
|
)
|
|
|
|
|
|
def run(bucket, files,
|
|
indexes=True, parent_links=True, topdir_parent_link=False,
|
|
partition=False, footer='index_footer.html',
|
|
prefix=None, public=True, dry_run=False, api_key=None,
|
|
instance_id=None, endpoint_url=None, bucket_location=None):
|
|
|
|
client = ibm_boto3.client(
|
|
"s3",
|
|
ibm_api_key_id=api_key,
|
|
ibm_service_instance_id=instance_id,
|
|
config=Config(signature_version="oauth"),
|
|
endpoint_url=endpoint_url,
|
|
)
|
|
|
|
if prefix:
|
|
prefix = prefix.lstrip('/')
|
|
if partition and prefix:
|
|
parts = prefix.split('/')
|
|
if len(parts) > 1:
|
|
bucket += '_' + parts[0]
|
|
prefix = '/'.join(parts[1:])
|
|
|
|
# Create the objects to make sure the arguments are sound.
|
|
with FileList() as file_list:
|
|
# Scan the files.
|
|
for file_path in files:
|
|
file_list.add(file_path)
|
|
|
|
indexer = Indexer(file_list)
|
|
|
|
# (Possibly) make indexes.
|
|
if indexes:
|
|
indexer.make_indexes(create_parent_links=parent_links,
|
|
create_topdir_parent_link=topdir_parent_link,
|
|
append_footer=footer)
|
|
|
|
logging.debug("List of files prepared to upload:")
|
|
for x in file_list:
|
|
logging.debug(x)
|
|
|
|
# Upload.
|
|
uploader = Uploader(client, bucket, prefix, public, dry_run,
|
|
endpoint_url, bucket_location)
|
|
uploader.upload(file_list)
|
|
return uploader.url
|
|
|
|
|
|
def ansible_main():
|
|
module = AnsibleModule(
|
|
argument_spec=dict(
|
|
bucket=dict(required=True, type='str'),
|
|
files=dict(required=True, type='list'),
|
|
partition=dict(type='bool', default=False),
|
|
indexes=dict(type='bool', default=True),
|
|
parent_links=dict(type='bool', default=True),
|
|
topdir_parent_link=dict(type='bool', default=False),
|
|
public=dict(type='bool', default=True),
|
|
footer=dict(type='str'),
|
|
prefix=dict(type='str'),
|
|
api_key=dict(type='str'),
|
|
instance_id=dict(type='str'),
|
|
endpoint_url=dict(type='str'),
|
|
bucket_location=dict(type='str'),
|
|
)
|
|
)
|
|
|
|
p = module.params
|
|
url = run(p.get('bucket'), p.get('files'),
|
|
indexes=p.get('indexes'),
|
|
parent_links=p.get('parent_links'),
|
|
topdir_parent_link=p.get('topdir_parent_link'),
|
|
partition=p.get('partition'),
|
|
footer=p.get('footer'),
|
|
prefix=p.get('prefix'),
|
|
public=p.get('public'),
|
|
api_key=p.get('api_key'),
|
|
instance_id=p.get('instance_id'),
|
|
endpoint_url=p.get('endpoint_url'),
|
|
bucket_location=p.get('bucket_location'))
|
|
module.exit_json(changed=True,
|
|
url=url)
|
|
|
|
|
|
def cli_main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Upload files to IBM Cloud Storage"
|
|
)
|
|
parser.add_argument('--verbose', action='store_true',
|
|
help='show debug information')
|
|
parser.add_argument('--no-indexes', action='store_true',
|
|
help='do not generate any indexes at all')
|
|
parser.add_argument('--no-parent-links', action='store_true',
|
|
help='do not include links back to a parent dir')
|
|
parser.add_argument('--create-topdir-parent-link', action='store_true',
|
|
help='include a link in the root directory of the '
|
|
'files to the parent directory which may be the '
|
|
'index of all results')
|
|
parser.add_argument('--no-public', action='store_true',
|
|
help='do not create the bucket as public')
|
|
parser.add_argument('--partition', action='store_true',
|
|
help='partition the prefix into multiple buckets')
|
|
parser.add_argument('--append-footer', default='index_footer.html',
|
|
help='when generating an index, if the given file is '
|
|
'present in a directory, append it to the index '
|
|
'(set to "none" to disable)')
|
|
parser.add_argument('--prefix',
|
|
help='Prepend this path to the object names when '
|
|
'uploading')
|
|
parser.add_argument('--dry-run', action='store_true',
|
|
help='do not attempt to create buckets or upload, '
|
|
'useful with --verbose for debugging')
|
|
parser.add_argument('--api-key',
|
|
help='An IBM Cloud API key')
|
|
parser.add_argument('--instance-id',
|
|
help='An IBM Cloud Object Storage instance ID')
|
|
parser.add_argument('--endpoint-url',
|
|
help='An IBM Cloud Object Storage endpoint URL')
|
|
parser.add_argument('--bucket-location',
|
|
help='The location constraint for the bucket')
|
|
parser.add_argument('bucket',
|
|
help='Name of the bucket to use when uploading')
|
|
parser.add_argument('files', nargs='+',
|
|
help='the file(s) to upload with recursive glob '
|
|
'matching when supplied as a string')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.verbose:
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
logging.captureWarnings(True)
|
|
|
|
append_footer = args.append_footer
|
|
if append_footer.lower() == 'none':
|
|
append_footer = None
|
|
|
|
url = run(args.bucket, args.files,
|
|
indexes=not args.no_indexes,
|
|
parent_links=not args.no_parent_links,
|
|
topdir_parent_link=args.create_topdir_parent_link,
|
|
partition=args.partition,
|
|
footer=append_footer,
|
|
prefix=args.prefix,
|
|
public=not args.no_public,
|
|
dry_run=args.dry_run,
|
|
api_key=args.api_key,
|
|
instance_id=args.instance_id,
|
|
endpoint_url=args.endpoint_url,
|
|
bucket_location=args.bucket_location)
|
|
print(url)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if sys.stdin.isatty():
|
|
cli_main()
|
|
else:
|
|
ansible_main()
|