Add upload-logs-gcs role

This uploads logs to Google Cloud Storage.  It is based on
upload-logs-swift.

Change-Id: I4d83c3f4eb7f186271302cbb6ebae6b1f280785b
This commit is contained in:
James E. Blair 2020-01-21 16:04:02 -08:00
parent 7a1684fffb
commit b8ce30f3cb
24 changed files with 1377 additions and 0 deletions

View File

@ -11,4 +11,5 @@ Log Roles
.. zuul:autorole:: publish-artifacts-to-fileserver
.. zuul:autorole:: set-zuul-log-path-fact
.. zuul:autorole:: upload-logs
.. zuul:autorole:: upload-logs-gcs
.. zuul:autorole:: upload-logs-swift

View File

@ -0,0 +1,63 @@
Upload logs to Google Cloud Storage
Before using this role, create at least one bucket and set up
appropriate access controls or lifecycle events. This role will not
automatically create buckets (though it will configure CORS policies).
This role requires the ``google-cloud-storage`` Python package to be
installed in the Ansible environment on the Zuul executor. It uses
Google Cloud Application Default Credentials.
**Role Variables**
.. zuul:rolevar:: zuul_site_upload_logs
:default: true
Controls when logs are uploaded. true, the default, means always
upload logs. false means never upload logs. 'failure' means to only
upload logs when the job has failed.
.. note:: Intended to be set by admins via site-variables.
.. zuul:rolevar:: zuul_log_partition
:default: false
If set to true, then the first component of the log path will be
removed from the object name and added to the bucket name, so that
logs for different changes are distributed across a large number of
buckets.
.. zuul:rolevar:: zuul_log_container
This role *will not* create buckets which do not already exist. If
partitioning is not enabled, this is the name of the bucket which
will be used. If partitioning is enabled, then this will be used
as the prefix for the bucket name which will be separated from the
partition name by an underscore. For example, "logs_42" would be
the bucket name for partition 42.
Note that you will want to set this to a value that uniquely
identifies your Zuul installation.
.. zuul:rolevar:: zuul_log_path
:default: Generated by the role `set-zuul-log-path-fact`
Prepend this path to the object names when uploading.
.. zuul:rolevar:: zuul_log_create_indexes
:default: true
Whether to create `index.html` files with directory indexes. If set
to false, Swift containers can be marked with a `Web-Listings=true`
property to activate Swift's own directory indexing.
.. zuul:rolevar:: zuul_log_path_shard_build
:default: false
This var is consumed by set-zuul-log-path-fact which
upload-logs-gcs calls into. If you set this you will get log paths
prefixed with the first three characters of the build uuid. This
will improve log file sharding.
More details can be found at
:zuul:rolevar:`set-zuul-log-path-fact.zuul_log_path_shard_build`.

View File

View File

@ -0,0 +1,3 @@
zuul_log_partition: false
zuul_log_container_public: true
zuul_log_create_indexes: true

View File

@ -0,0 +1 @@
{"test": "foo"}

View File

@ -0,0 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg>
</svg>

After

Width:  |  Height:  |  Size: 52 B

View File

@ -0,0 +1,2 @@
This is a plan text file with a funny name.
The index links should escape the :'s.

View File

@ -0,0 +1 @@
{"test": "foo"}

View File

@ -0,0 +1,404 @@
# Copyright (C) 2018-2019 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import os
import testtools
import time
import stat
import fixtures
from bs4 import BeautifulSoup
from .zuul_google_storage_upload import FileList, Indexer, FileDetail
FIXTURE_DIR = os.path.join(os.path.dirname(__file__),
'test-fixtures')
class SymlinkFixture(fixtures.Fixture):
links = [
('bad_symlink', '/etc'),
('bad_symlink_file', '/etc/issue'),
('good_symlink', 'controller'),
('recursive_symlink', '.'),
('symlink_file', 'job-output.json'),
('symlink_loop_a', 'symlink_loop'),
('symlink_loop/symlink_loop_b', '..'),
]
def _setUp(self):
self._cleanup()
for (src, target) in self.links:
path = os.path.join(FIXTURE_DIR, 'links', src)
os.symlink(target, path)
self.addCleanup(self._cleanup)
def _cleanup(self):
for (src, target) in self.links:
path = os.path.join(FIXTURE_DIR, 'links', src)
if os.path.exists(path):
os.unlink(path)
class TestFileList(testtools.TestCase):
def assert_files(self, result, files):
self.assertEqual(len(result), len(files))
for expected, received in zip(files, result):
self.assertEqual(expected[0], received.relative_path)
if expected[0] and expected[0][-1] == '/':
efilename = os.path.split(
os.path.dirname(expected[0]))[1] + '/'
else:
efilename = os.path.split(expected[0])[1]
self.assertEqual(efilename, received.filename)
if received.folder:
if received.full_path is not None and expected[0] != '':
self.assertTrue(os.path.isdir(received.full_path))
else:
self.assertTrue(os.path.isfile(received.full_path))
self.assertEqual(expected[1], received.mimetype)
self.assertEqual(expected[2], received.encoding)
def find_file(self, file_list, path):
for f in file_list:
if f.relative_path == path:
return f
def test_single_dir_trailing_slash(self):
'''Test a single directory with a trailing slash'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs/'))
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('zuul-info', 'application/directory', None),
('job-output.json', 'application/json', None),
('controller/subdir', 'application/directory', None),
('controller/compressed.gz', 'text/plain', 'gzip'),
('controller/cpu-load.svg', 'image/svg+xml', None),
('controller/journal.xz', 'text/plain', 'xz'),
('controller/service_log.txt', 'text/plain', None),
('controller/syslog', 'text/plain', None),
('controller/subdir/foo::3.txt', 'text/plain', None),
('controller/subdir/subdir.txt', 'text/plain', None),
('zuul-info/inventory.yaml', 'text/plain', None),
('zuul-info/zuul-info.controller.txt', 'text/plain', None),
])
def test_single_dir(self):
'''Test a single directory without a trailing slash'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs'))
self.assert_files(fl, [
('', 'application/directory', None),
('logs', 'application/directory', None),
('logs/controller', 'application/directory', None),
('logs/zuul-info', 'application/directory', None),
('logs/job-output.json', 'application/json', None),
('logs/controller/subdir', 'application/directory', None),
('logs/controller/compressed.gz', 'text/plain', 'gzip'),
('logs/controller/cpu-load.svg', 'image/svg+xml', None),
('logs/controller/journal.xz', 'text/plain', 'xz'),
('logs/controller/service_log.txt', 'text/plain', None),
('logs/controller/syslog', 'text/plain', None),
('logs/controller/subdir/foo::3.txt', 'text/plain', None),
('logs/controller/subdir/subdir.txt', 'text/plain', None),
('logs/zuul-info/inventory.yaml', 'text/plain', None),
('logs/zuul-info/zuul-info.controller.txt',
'text/plain', None),
])
def test_single_file(self):
'''Test a single file'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR,
'logs/zuul-info/inventory.yaml'))
self.assert_files(fl, [
('', 'application/directory', None),
('inventory.yaml', 'text/plain', None),
])
def test_symlinks(self):
'''Test symlinks'''
with FileList() as fl:
self.useFixture(SymlinkFixture())
fl.add(os.path.join(FIXTURE_DIR, 'links/'))
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('good_symlink', 'application/directory', None),
('recursive_symlink', 'application/directory', None),
('symlink_loop', 'application/directory', None),
('symlink_loop_a', 'application/directory', None),
('job-output.json', 'application/json', None),
('symlink_file', 'text/plain', None),
('controller/service_log.txt', 'text/plain', None),
('symlink_loop/symlink_loop_b', 'application/directory', None),
('symlink_loop/placeholder', 'text/plain', None),
])
def test_index_files(self):
'''Test index generation'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs'))
ix = Indexer(fl)
ix.make_indexes()
self.assert_files(fl, [
('', 'application/directory', None),
('index.html', 'text/html', None),
('logs', 'application/directory', None),
('logs/controller', 'application/directory', None),
('logs/zuul-info', 'application/directory', None),
('logs/job-output.json', 'application/json', None),
('logs/index.html', 'text/html', None),
('logs/controller/subdir', 'application/directory', None),
('logs/controller/compressed.gz', 'text/plain', 'gzip'),
('logs/controller/cpu-load.svg', 'image/svg+xml', None),
('logs/controller/journal.xz', 'text/plain', 'xz'),
('logs/controller/service_log.txt', 'text/plain', None),
('logs/controller/syslog', 'text/plain', None),
('logs/controller/index.html', 'text/html', None),
('logs/controller/subdir/foo::3.txt', 'text/plain', None),
('logs/controller/subdir/subdir.txt', 'text/plain', None),
('logs/controller/subdir/index.html', 'text/html', None),
('logs/zuul-info/inventory.yaml', 'text/plain', None),
('logs/zuul-info/zuul-info.controller.txt',
'text/plain', None),
('logs/zuul-info/index.html', 'text/html', None),
])
top_index = self.find_file(fl, 'index.html')
page = open(top_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0].find('a').get('href'), 'logs/')
self.assertEqual(rows[0].find('a').text, 'logs/')
subdir_index = self.find_file(
fl, 'logs/controller/subdir/index.html')
page = open(subdir_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(rows[0].find('a').get('href'), '../')
self.assertEqual(rows[0].find('a').text, '../')
# Test proper escaping of files with funny names
self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt')
self.assertEqual(rows[1].find('a').text, 'foo::3.txt')
# Test files without escaping
self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt')
self.assertEqual(rows[2].find('a').text, 'subdir.txt')
def test_index_files_trailing_slash(self):
'''Test index generation with a trailing slash'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs/'))
ix = Indexer(fl)
ix.make_indexes()
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('zuul-info', 'application/directory', None),
('job-output.json', 'application/json', None),
('index.html', 'text/html', None),
('controller/subdir', 'application/directory', None),
('controller/compressed.gz', 'text/plain', 'gzip'),
('controller/cpu-load.svg', 'image/svg+xml', None),
('controller/journal.xz', 'text/plain', 'xz'),
('controller/service_log.txt', 'text/plain', None),
('controller/syslog', 'text/plain', None),
('controller/index.html', 'text/html', None),
('controller/subdir/foo::3.txt', 'text/plain', None),
('controller/subdir/subdir.txt', 'text/plain', None),
('controller/subdir/index.html', 'text/html', None),
('zuul-info/inventory.yaml', 'text/plain', None),
('zuul-info/zuul-info.controller.txt', 'text/plain', None),
('zuul-info/index.html', 'text/html', None),
])
top_index = self.find_file(fl, 'index.html')
page = open(top_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(len(rows), 3)
self.assertEqual(rows[0].find('a').get('href'), 'controller/')
self.assertEqual(rows[0].find('a').text, 'controller/')
self.assertEqual(rows[1].find('a').get('href'), 'zuul-info/')
self.assertEqual(rows[1].find('a').text, 'zuul-info/')
subdir_index = self.find_file(fl, 'controller/subdir/index.html')
page = open(subdir_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(rows[0].find('a').get('href'), '../')
self.assertEqual(rows[0].find('a').text, '../')
# Test proper escaping of files with funny names
self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt')
self.assertEqual(rows[1].find('a').text, 'foo::3.txt')
# Test files without escaping
self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt')
self.assertEqual(rows[2].find('a').text, 'subdir.txt')
def test_topdir_parent_link(self):
'''Test index generation creates topdir parent link'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs/'))
ix = Indexer(fl)
ix.make_indexes(
create_parent_links=True,
create_topdir_parent_link=True)
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('zuul-info', 'application/directory', None),
('job-output.json', 'application/json', None),
('index.html', 'text/html', None),
('controller/subdir', 'application/directory', None),
('controller/compressed.gz', 'text/plain', 'gzip'),
('controller/cpu-load.svg', 'image/svg+xml', None),
('controller/journal.xz', 'text/plain', 'xz'),
('controller/service_log.txt', 'text/plain', None),
('controller/syslog', 'text/plain', None),
('controller/index.html', 'text/html', None),
('controller/subdir/foo::3.txt', 'text/plain', None),
('controller/subdir/subdir.txt', 'text/plain', None),
('controller/subdir/index.html', 'text/html', None),
('zuul-info/inventory.yaml', 'text/plain', None),
('zuul-info/zuul-info.controller.txt', 'text/plain', None),
('zuul-info/index.html', 'text/html', None),
])
top_index = self.find_file(fl, 'index.html')
page = open(top_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(len(rows), 4)
self.assertEqual(rows[0].find('a').get('href'), '../')
self.assertEqual(rows[0].find('a').text, '../')
self.assertEqual(rows[1].find('a').get('href'), 'controller/')
self.assertEqual(rows[1].find('a').text, 'controller/')
self.assertEqual(rows[2].find('a').get('href'), 'zuul-info/')
self.assertEqual(rows[2].find('a').text, 'zuul-info/')
subdir_index = self.find_file(fl, 'controller/subdir/index.html')
page = open(subdir_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(rows[0].find('a').get('href'), '../')
self.assertEqual(rows[0].find('a').text, '../')
# Test proper escaping of files with funny names
self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt')
self.assertEqual(rows[1].find('a').text, 'foo::3.txt')
# Test files without escaping
self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt')
self.assertEqual(rows[2].find('a').text, 'subdir.txt')
def test_no_parent_links(self):
'''Test index generation creates topdir parent link'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs/'))
ix = Indexer(fl)
ix.make_indexes(
create_parent_links=False,
create_topdir_parent_link=False)
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('zuul-info', 'application/directory', None),
('job-output.json', 'application/json', None),
('index.html', 'text/html', None),
('controller/subdir', 'application/directory', None),
('controller/compressed.gz', 'text/plain', 'gzip'),
('controller/cpu-load.svg', 'image/svg+xml', None),
('controller/journal.xz', 'text/plain', 'xz'),
('controller/service_log.txt', 'text/plain', None),
('controller/syslog', 'text/plain', None),
('controller/index.html', 'text/html', None),
('controller/subdir/foo::3.txt', 'text/plain', None),
('controller/subdir/subdir.txt', 'text/plain', None),
('controller/subdir/index.html', 'text/html', None),
('zuul-info/inventory.yaml', 'text/plain', None),
('zuul-info/zuul-info.controller.txt', 'text/plain', None),
('zuul-info/index.html', 'text/html', None),
])
top_index = self.find_file(fl, 'index.html')
page = open(top_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(len(rows), 3)
self.assertEqual(rows[0].find('a').get('href'), 'controller/')
self.assertEqual(rows[0].find('a').text, 'controller/')
self.assertEqual(rows[1].find('a').get('href'), 'zuul-info/')
self.assertEqual(rows[1].find('a').text, 'zuul-info/')
subdir_index = self.find_file(fl, 'controller/subdir/index.html')
page = open(subdir_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
# Test proper escaping of files with funny names
self.assertEqual(rows[0].find('a').get('href'), 'foo%3A%3A3.txt')
self.assertEqual(rows[0].find('a').text, 'foo::3.txt')
# Test files without escaping
self.assertEqual(rows[1].find('a').get('href'), 'subdir.txt')
self.assertEqual(rows[1].find('a').text, 'subdir.txt')
class TestFileDetail(testtools.TestCase):
def test_get_file_detail(self):
'''Test files info'''
path = os.path.join(FIXTURE_DIR, 'logs/job-output.json')
file_detail = FileDetail(path, '')
path_stat = os.stat(path)
self.assertEqual(
time.gmtime(path_stat[stat.ST_MTIME]),
file_detail.last_modified)
self.assertEqual(16, file_detail.size)
def test_get_file_detail_missing_file(self):
'''Test files that go missing during a walk'''
file_detail = FileDetail('missing/file/that/we/cant/find', '')
self.assertEqual(time.gmtime(0), file_detail.last_modified)
self.assertEqual(0, file_detail.size)

View File

@ -0,0 +1,860 @@
#!/usr/bin/env python3
#
# Copyright 2014 Rackspace Australia
# Copyright 2018-2019 Red Hat, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
"""
Utility to upload files to google
"""
import argparse
import datetime
import gzip
import io
import json
import logging
import mimetypes
import os
try:
import queue as queuelib
except ImportError:
import Queue as queuelib
import shutil
import stat
import sys
import tempfile
import threading
import time
try:
import urllib.parse as urlparse
except ImportError:
import urllib as urlparse
import zlib
import collections
from google.cloud import storage
import google.auth.compute_engine.credentials as gce_cred
from ansible.module_utils.basic import AnsibleModule
try:
# Python 3.3+
from collections.abc import Sequence
except ImportError:
from collections import Sequence
mimetypes.init()
mimetypes.add_type('text/plain', '.yaml')
MAX_UPLOAD_THREADS = 24
POST_ATTEMPTS = 3
# Map mime types to apache icons
APACHE_MIME_ICON_MAP = {
'_default': 'unknown.png',
'application/gzip': 'compressed.png',
'application/directory': 'folder.png',
'text/html': 'text.png',
'text/plain': 'text.png',
}
# Map mime types to apache icons
APACHE_FILE_ICON_MAP = {
'..': 'back.png',
}
# These icon files are from the Apache project and are in the public
# domain.
ICON_IMAGES = {
'back.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAElBMVEX/'
'///M//+ZmZlmZmYzMzMAAACei5rnAAAAAnRSTlP/AOW3MEoAAABWSURB'
'VHjabdBBCgAhDEPRRpv7X3kwEMsQ//IRRC08urjRHbha5VLFUsVSxVI9'
'lmDh5hMpHD6n0EgoiZG0DNINpnWlcVXaRix76e1/8dddcL6nG0Ri9gHj'
'tgSXKYeLBgAAAABJRU5ErkJggg==',
'compressed.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAADAFBM'
'VEX//////8z//5n//2b//zP//wD/zP//zMz/zJn/zGb/zDP/zAD/'
'mf//mcz/mZn/mWb/mTP/mQD/Zv//Zsz/Zpn/Zmb/ZjP/ZgD/M///'
'M8z/M5n/M2b/MzP/MwD/AP//AMz/AJn/AGb/ADP/AADM///M/8zM'
'/5nM/2bM/zPM/wDMzP/MzMzMzJnMzGbMzDPMzADMmf/MmczMmZnM'
'mWbMmTPMmQDMZv/MZszMZpnMZmbMZjPMZgDMM//MM8zMM5nMM2bM'
'MzPMMwDMAP/MAMzMAJnMAGbMADPMAACZ//+Z/8yZ/5mZ/2aZ/zOZ'
'/wCZzP+ZzMyZzJmZzGaZzDOZzACZmf+ZmcyZmZmZmWaZmTOZmQCZ'
'Zv+ZZsyZZpmZZmaZZjOZZgCZM/+ZM8yZM5mZM2aZMzOZMwCZAP+Z'
'AMyZAJmZAGaZADOZAABm//9m/8xm/5lm/2Zm/zNm/wBmzP9mzMxm'
'zJlmzGZmzDNmzABmmf9mmcxmmZlmmWZmmTNmmQBmZv9mZsxmZplm'
'ZmZmZjNmZgBmM/9mM8xmM5lmM2ZmMzNmMwBmAP9mAMxmAJlmAGZm'
'ADNmAAAz//8z/8wz/5kz/2Yz/zMz/wAzzP8zzMwzzJkzzGYzzDMz'
'zAAzmf8zmcwzmZkzmWYzmTMzmQAzZv8zZswzZpkzZmYzZjMzZgAz'
'M/8zM8wzM5kzM2YzMzMzMwAzAP8zAMwzAJkzAGYzADMzAAAA//8A'
'/8wA/5kA/2YA/zMA/wAAzP8AzMwAzJkAzGYAzDMAzAAAmf8AmcwA'
'mZkAmWYAmTMAmQAAZv8AZswAZpkAZmYAZjMAZgAAM/8AM8wAM5kA'
'M2YAMzMAMwAAAP8AAMwAAJkAAGYAADPuAADdAAC7AACqAACIAAB3'
'AABVAABEAAAiAAARAAAA7gAA3QAAuwAAqgAAiAAAdwAAVQAARAAA'
'IgAAEQAAAO4AAN0AALsAAKoAAIgAAHcAAFUAAEQAACIAABHu7u7d'
'3d27u7uqqqqIiIh3d3dVVVVEREQiIiIREREAAAD7CIKZAAAAJXRS'
'TlP///////////////////////////////////////////////8A'
'P89CTwAAAGtJREFUeNp9z9ENgDAIhOEOco+dybVuEXasFMRDY/x5'
'+xJCO6Znu6kSx7BhXyjtKBWWNlwW88Loid7hFRKBXiIYCMfMEYUQ'
'QohC3CjFA5nIjqx1CqlDLGR/EhM5O06yvin0ftGOyIS7lV14AsQN'
'aR7rMEBYAAAAAElFTkSuQmCC',
'folder.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAElBMVEX/'
'////zJnM//+ZZjMzMzMAAADCEvqoAAAAA3RSTlP//wDXyg1BAAAASElE'
'QVR42s3KQQ6AQAhDUaXt/a/sQDrRJu7c+NmQB0e99B3lnqjT6cYx6zSI'
'bV40n3D7psYMoBoz4w8/EdNYQsbGEjNxYSljXTEsA9O1pLTvAAAAAElF'
'TkSuQmCC',
'text.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAD1BMVEX/'
'///M//+ZmZkzMzMAAABVsTOVAAAAAnRSTlP/AOW3MEoAAABISURBVHja'
'tcrRCgAgCENRbf7/N7dKomGvngjhMsPLD4NdMPwia438NRIyxsaL/XQZ'
'hyxpkC6zyjLXGVXnkhqWJWIIrOgeinECLlUCjBCqNQoAAAAASUVORK5C'
'YII=',
'unknown.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAD1BMVEX/'
'///M//+ZmZkzMzMAAABVsTOVAAAAAnRSTlP/AOW3MEoAAABYSURBVHja'
'ncvRDoAgDEPRruX/v1kmNHPBxMTLyzgD6FmsILg56g2hQnJkOco4yZhq'
'tN5nYd5Zq0LsHblwxwP9GTCWsaGtoelANKzOlz/RfaLYUmLE6E28ALlN'
'AupSdoFsAAAAAElFTkSuQmCC'}
# Begin vendored code
# This code is licensed under the Public Domain/CC0 and comes from
# https://github.com/leenr/gzip-stream/blob/master/gzip_stream.py
# Code was modified:
# removed type annotations to support python2.
# removed use of *, somearg for positional anonymous args.
# Default compression level to 9.
class GZIPCompressedStream(io.RawIOBase):
def __init__(self, stream, compression_level=9):
assert 1 <= compression_level <= 9
self._compression_level = compression_level
self._stream = stream
self._compressed_stream = io.BytesIO()
self._compressor = gzip.GzipFile(
mode='wb',
fileobj=self._compressed_stream,
compresslevel=compression_level
)
# because of the GZIP header written by `GzipFile.__init__`:
self._compressed_stream.seek(0)
self.count = 0
def read(self, length):
r = super().read(length)
self.count += len(r)
return r
def tell(self):
return self.count
@property
def compression_level(self):
return self._compression_level
@property
def stream(self):
return self._stream
def readable(self):
return True
def _read_compressed_into(self, b):
buf = self._compressed_stream.read(len(b))
b[:len(buf)] = buf
return len(buf)
def readinto(self, b):
b = memoryview(b)
offset = 0
size = len(b)
while offset < size:
offset += self._read_compressed_into(b[offset:])
if offset < size:
# self._compressed_buffer now empty
if self._compressor.closed:
# nothing to compress anymore
break
# compress next bytes
self._read_n_compress(size)
return offset
def _read_n_compress(self, size):
assert size > 0
data = self._stream.read(size)
# rewind buffer to the start to free up memory
# (because anything currently in the buffer should be already
# streamed off the object)
self._compressed_stream.seek(0)
self._compressed_stream.truncate(0)
if data:
self._compressor.write(data)
else:
# this will write final data (will flush zlib with Z_FINISH)
self._compressor.close()
# rewind to the buffer start
self._compressed_stream.seek(0)
def __repr__(self):
return (
'{self.__class__.__name__}('
'{self.stream!r}, '
'compression_level={self.compression_level!r}'
')'
).format(self=self)
# End vendored code
def get_mime_icon(mime, filename=''):
icon = (APACHE_FILE_ICON_MAP.get(filename) or
APACHE_MIME_ICON_MAP.get(mime) or
APACHE_MIME_ICON_MAP['_default'])
return "data:image/png;base64,%s" % ICON_IMAGES[icon]
def retry_function(func):
for attempt in range(1, POST_ATTEMPTS + 1):
try:
return func()
except Exception:
if attempt >= POST_ATTEMPTS:
raise
else:
logging.exception("Error on attempt %d" % attempt)
time.sleep(attempt * 10)
def sizeof_fmt(num, suffix='B'):
# From http://stackoverflow.com/questions/1094841/
# reusable-library-to-get-human-readable-version-of-file-size
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Y', suffix)
class Credentials(gce_cred.Credentials):
def __init__(self, path):
self._path = path
self.refresh(None)
def refresh(self, request):
with open(self._path) as f:
data = json.loads(f.read())
self.token = data['access_token']
self.expiry = (datetime.datetime.utcnow() +
datetime.timedelta(seconds=data['expires_in']))
class FileDetail():
"""
Used to generate indexes with links or as the file path
to push to storage.
"""
def __init__(self, full_path, relative_path, filename=None):
"""
Args:
full_path (str): The absolute path to the file on disk.
relative_path (str): The relative path from the artifacts source
used for links.
filename (str): An optional alternate filename in links.
"""
# Make FileNotFoundError exception to be compatible with python2
try:
FileNotFoundError # noqa: F823
except NameError:
FileNotFoundError = OSError
self.full_path = full_path
if filename is None:
self.filename = os.path.basename(full_path)
else:
self.filename = filename
self.relative_path = relative_path
if self.full_path and os.path.isfile(self.full_path):
mime_guess, encoding = mimetypes.guess_type(self.full_path)
self.mimetype = mime_guess if mime_guess else 'text/plain'
self.encoding = encoding
self.folder = False
else:
self.mimetype = 'application/directory'
self.encoding = None
self.folder = True
try:
st = os.stat(self.full_path)
self.last_modified = time.gmtime(st[stat.ST_MTIME])
self.size = st[stat.ST_SIZE]
except (FileNotFoundError, TypeError):
self.last_modified = time.gmtime(0)
self.size = 0
def __repr__(self):
t = 'Folder' if self.folder else 'File'
return '<%s %s>' % (t, self.relative_path)
class FileList(Sequence):
'''A collection of FileDetail objects
This is a list-like group of FileDetail objects, intended to be
used as a context manager around the upload process.
'''
def __init__(self):
self.file_list = []
self.file_list.append(FileDetail(None, '', ''))
self.tempdirs = []
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
for tempdir in self.tempdirs:
shutil.rmtree(tempdir)
def __getitem__(self, item):
return self.file_list.__getitem__(item)
def __len__(self):
return self.file_list.__len__()
def get_tempdir(self):
'''Get a temporary directory
Returns path to a private temporary directory which will be
cleaned on exit
'''
tempdir = tempfile.mkdtemp(prefix='s-u-l-tmp')
self.tempdirs.append(tempdir)
return tempdir
@staticmethod
def _path_in_tree(root, path):
full_path = os.path.realpath(os.path.abspath(
os.path.expanduser(path)))
if not full_path.startswith(root):
logging.debug("Skipping path outside root: %s" % (path,))
return False
return True
def add(self, file_path):
"""
Generate a list of files to upload to storage. Recurses through
directories
"""
# file_list: A list of FileDetails to push to storage
file_list = []
if os.path.isfile(file_path):
relative_path = os.path.basename(file_path)
file_list.append(FileDetail(file_path, relative_path))
elif os.path.isdir(file_path):
original_root = os.path.realpath(os.path.abspath(
os.path.expanduser(file_path)))
parent_dir = os.path.dirname(file_path)
if not file_path.endswith('/'):
filename = os.path.basename(file_path)
full_path = file_path
relative_name = os.path.relpath(full_path, parent_dir)
file_list.append(FileDetail(full_path, relative_name,
filename))
# TODO: this will copy the result of symlinked files, but
# it won't follow directory symlinks. If we add that, we
# should ensure that we don't loop.
for path, folders, files in os.walk(file_path):
# Sort folder in-place so that we recurse in order.
files.sort(key=lambda x: x.lower())
folders.sort(key=lambda x: x.lower())
# relative_path: The path between the given directory
# and the one being currently walked.
relative_path = os.path.relpath(path, parent_dir)
for filename in folders:
full_path = os.path.join(path, filename)
if not self._path_in_tree(original_root, full_path):
continue
relative_name = os.path.relpath(full_path, parent_dir)
file_list.append(FileDetail(full_path, relative_name,
filename))
for filename in files:
full_path = os.path.join(path, filename)
if not self._path_in_tree(original_root, full_path):
continue
relative_name = os.path.relpath(full_path, parent_dir)
file_detail = FileDetail(full_path, relative_name)
file_list.append(file_detail)
self.file_list += file_list
class Indexer():
"""Index a FileList
Functions to generate indexes and other collated data for a
FileList
- make_indexes() : make index.html in folders
"""
def __init__(self, file_list):
'''
Args:
file_list (FileList): A FileList object with all files
to be indexed.
'''
assert isinstance(file_list, FileList)
self.file_list = file_list
def _make_index_file(self, folder_links, title, tempdir, append_footer):
"""Writes an index into a file for pushing"""
for file_details in folder_links:
# Do not generate an index file if one exists already.
# This may be the case when uploading other machine generated
# content like python coverage info.
if self.index_filename == file_details.filename:
return
index_content = self._generate_log_index(
folder_links, title, append_footer)
fd = open(os.path.join(tempdir, self.index_filename), 'w')
fd.write(index_content)
return os.path.join(tempdir, self.index_filename)
def _generate_log_index(self, folder_links, title, append_footer):
"""Create an index of logfiles and links to them"""
output = '<html><head><title>%s</title></head><body>\n' % title
output += '<h1>%s</h1>\n' % title
output += '<table><tr><th></th><th>Name</th><th>Last Modified</th>'
output += '<th>Size</th></tr>'
file_details_to_append = None
for file_details in folder_links:
output += '<tr>'
output += (
'<td><img alt="[ ]" title="%(m)s" src="%(i)s"></img></td>' % ({
'm': file_details.mimetype,
'i': get_mime_icon(file_details.mimetype,
file_details.filename),
}))
filename = file_details.filename
if file_details.folder:
filename += '/'
output += '<td><a href="%s">%s</a></td>' % (
urlparse.quote(filename),
filename)
output += '<td>%s</td>' % time.asctime(
file_details.last_modified)
size = sizeof_fmt(file_details.size, suffix='')
output += '<td style="text-align: right">%s</td>' % size
output += '</tr>\n'
if (append_footer and
append_footer in file_details.filename):
file_details_to_append = file_details
output += '</table>'
if file_details_to_append:
output += '<br /><hr />'
try:
with open(file_details_to_append.full_path, 'r') as f:
output += f.read()
except IOError:
logging.exception("Error opening file for appending")
output += '</body></html>\n'
return output
def make_indexes(self, create_parent_links=True,
create_topdir_parent_link=False,
append_footer='index_footer.html'):
'''Make index.html files
Iterate the file list and crete index.html files for folders
Args:
create_parent_links (bool): Create parent links
create_topdir_parent_link (bool): Create topdir parent link
append_footer (str): Filename of a footer to append to each
generated page
Return:
No value, the self.file_list will be updated
'''
self.index_filename = 'index.html'
folders = collections.OrderedDict()
for f in self.file_list:
if f.folder:
folders[f.relative_path] = []
folder = os.path.dirname(os.path.dirname(
f.relative_path + '/'))
if folder == '/':
folder = ''
else:
folder = os.path.dirname(f.relative_path)
folders[folder].append(f)
indexes = {}
parent_file_detail = FileDetail(None, '..', '..')
for folder, files in folders.items():
# Don't add the pseudo-top-directory
if files and files[0].full_path is None:
files = files[1:]
if create_topdir_parent_link:
files = [parent_file_detail] + files
elif create_parent_links:
files = [parent_file_detail] + files
# Do generate a link to the parent directory
full_path = self._make_index_file(files, 'Index of %s' % (folder,),
self.file_list.get_tempdir(),
append_footer)
if full_path:
filename = os.path.basename(full_path)
relative_name = os.path.join(folder, filename)
indexes[folder] = FileDetail(full_path, relative_name)
# This appends the index file at the end of the group of files
# for each directory.
new_list = []
last_dirname = None
for f in reversed(list(self.file_list)):
if f.folder:
relative_path = f.relative_path + '/'
else:
relative_path = f.relative_path
dirname = os.path.dirname(relative_path)
if dirname == '/':
dirname = ''
if dirname != last_dirname:
index = indexes.pop(dirname, None)
if index:
new_list.append(index)
last_dirname = dirname
new_list.append(f)
new_list.reverse()
self.file_list.file_list = new_list
class GzipFilter():
chunk_size = 16384
def __init__(self, infile):
self.gzipfile = GZIPCompressedStream(infile)
self.done = False
def __iter__(self):
return self
def __next__(self):
if self.done:
self.gzipfile.close()
raise StopIteration()
data = self.gzipfile.read(self.chunk_size)
if not data:
self.done = True
return data
class DeflateFilter():
chunk_size = 16384
def __init__(self, infile):
self.infile = infile
self.encoder = zlib.compressobj()
self.done = False
def __iter__(self):
return self
def __next__(self):
if self.done:
raise StopIteration()
ret = b''
while True:
data = self.infile.read(self.chunk_size)
if data:
ret = self.encoder.compress(data)
if ret:
break
else:
self.done = True
ret = self.encoder.flush()
break
return ret
class Uploader():
def __init__(self, client, container, prefix=None,
dry_run=False):
self.dry_run = dry_run
if dry_run:
self.url = 'http://dry-run-url.com/a/path/'
return
self.client = client
self.prefix = prefix or ''
self.bucket = client.bucket(container)
cors = [{
'method': ['GET', 'HEAD'],
'origin': ['*']
}]
self.bucket.cors = cors
self.bucket.website = {"mainPageSuffix": "index.html"}
self.bucket.update()
self.url = os.path.join('https://storage.googleapis.com/',
container, self.prefix)
def upload(self, file_list):
"""Spin up thread pool to upload to storage"""
if self.dry_run:
return
num_threads = min(len(file_list), MAX_UPLOAD_THREADS)
threads = []
queue = queuelib.Queue()
# add items to queue
for f in file_list:
queue.put(f)
for x in range(num_threads):
t = threading.Thread(target=self.post_thread, args=(queue,))
threads.append(t)
t.start()
for t in threads:
t.join()
def post_thread(self, queue):
while True:
try:
file_detail = queue.get_nowait()
logging.debug("%s: processing job %s",
threading.current_thread(),
file_detail)
retry_function(lambda: self._post_file(file_detail))
except IOError:
# Do our best to attempt to upload all the files
logging.exception("Error opening file")
continue
except queuelib.Empty:
# No more work to do
return
@staticmethod
def _is_text_type(mimetype):
# We want to compress all text types.
if mimetype.startswith('text/'):
return True
# Further compress types that typically contain text but are no
# text sub type.
compress_types = [
'application/json',
'image/svg+xml',
]
if mimetype in compress_types:
return True
return False
def _post_file(self, file_detail):
relative_path = os.path.join(self.prefix, file_detail.relative_path)
content_encoding = None
if not file_detail.folder:
if (file_detail.encoding is None and
self._is_text_type(file_detail.mimetype)):
content_encoding = 'gzip'
data = GZIPCompressedStream(open(file_detail.full_path, 'rb'))
else:
if (not file_detail.filename.endswith(".gz") and
file_detail.encoding):
# Don't apply gzip encoding to files that we receive as
# already gzipped. The reason for this is storage will
# serve this back to users as an uncompressed file if they
# don't set an accept-encoding that includes gzip. This
# can cause problems when the desired file state is
# compressed as with .tar.gz tarballs.
content_encoding = file_detail.encoding
data = open(file_detail.full_path, 'rb')
else:
data = ''
relative_path = relative_path.rstrip('/')
if relative_path == '':
relative_path = '/'
blob = self.bucket.blob(relative_path)
if content_encoding:
blob.content_encoding = content_encoding
if hasattr(data, 'tell'):
upload = blob.upload_from_file
else:
upload = blob.upload_from_string
upload(data, content_type=file_detail.mimetype)
def run(container, files,
indexes=True, parent_links=True, topdir_parent_link=False,
partition=False, footer='index_footer.html',
prefix=None, dry_run=False, credentials_file=None):
if credentials_file:
cred = Credentials(credentials_file)
client = storage.Client(credentials=cred)
else:
client = storage.Client()
if prefix:
prefix = prefix.lstrip('/')
if partition and prefix:
parts = prefix.split('/')
if len(parts) > 1:
container += '_' + parts[0]
prefix = '/'.join(parts[1:])
# Create the objects to make sure the arguments are sound.
with FileList() as file_list:
# Scan the files.
for file_path in files:
file_list.add(file_path)
indexer = Indexer(file_list)
# (Possibly) make indexes.
if indexes:
indexer.make_indexes(create_parent_links=parent_links,
create_topdir_parent_link=topdir_parent_link,
append_footer=footer)
logging.debug("List of files prepared to upload:")
for x in file_list:
logging.debug(x)
# Upload.
uploader = Uploader(client, container, prefix, dry_run)
uploader.upload(file_list)
return uploader.url
def ansible_main():
module = AnsibleModule(
argument_spec=dict(
container=dict(required=True, type='str'),
files=dict(required=True, type='list'),
partition=dict(type='bool', default=False),
indexes=dict(type='bool', default=True),
parent_links=dict(type='bool', default=True),
topdir_parent_link=dict(type='bool', default=False),
footer=dict(type='str'),
prefix=dict(type='str'),
credentials_file=dict(type='str'),
)
)
p = module.params
url = run(p.get('container'), p.get('files'),
indexes=p.get('indexes'),
parent_links=p.get('parent_links'),
topdir_parent_link=p.get('topdir_parent_link'),
partition=p.get('partition'),
footer=p.get('footer'),
prefix=p.get('prefix'),
credentials_file=p.get('credentials_file'))
module.exit_json(changed=True,
url=url)
def cli_main():
parser = argparse.ArgumentParser(
description="Upload files to Google Cloud Storage"
)
parser.add_argument('--verbose', action='store_true',
help='show debug information')
parser.add_argument('--no-indexes', action='store_true',
help='do not generate any indexes at all')
parser.add_argument('--no-parent-links', action='store_true',
help='do not include links back to a parent dir')
parser.add_argument('--create-topdir-parent-link', action='store_true',
help='include a link in the root directory of the '
'files to the parent directory which may be the '
'index of all results')
parser.add_argument('--partition', action='store_true',
help='partition the prefix into multiple containers')
parser.add_argument('--append-footer', default='index_footer.html',
help='when generating an index, if the given file is '
'present in a directory, append it to the index '
'(set to "none" to disable)')
parser.add_argument('--prefix',
help='Prepend this path to the object names when '
'uploading')
parser.add_argument('--dry-run', action='store_true',
help='do not attempt to create containers or upload, '
'useful with --verbose for debugging')
parser.add_argument('--credentials_file',
help='A file with Google cloud credentials')
parser.add_argument('container',
help='Name of the container to use when uploading')
parser.add_argument('files', nargs='+',
help='the file(s) to upload with recursive glob '
'matching when supplied as a string')
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
logging.captureWarnings(True)
append_footer = args.append_footer
if append_footer.lower() == 'none':
append_footer = None
url = run(args.container, args.files,
indexes=not args.no_indexes,
parent_links=not args.no_parent_links,
topdir_parent_link=args.create_topdir_parent_link,
partition=args.partition,
footer=append_footer,
prefix=args.prefix,
dry_run=args.dry_run,
credentials_file=args.credentials_file)
print(url)
if __name__ == '__main__':
if sys.stdin.isatty():
cli_main()
else:
ansible_main()

View File

@ -0,0 +1,37 @@
- name: Set zuul-log-path fact
include_role:
name: set-zuul-log-path-fact
when: zuul_log_path is not defined
# Always upload (true), never upload (false) or only on failure ('failure')
- when: zuul_site_upload_logs | default(true) | bool or
(zuul_site_upload_logs == 'failure' and not zuul_success | bool)
block:
# Use chmod instead of file because ansible 2.5 file with recurse and
# follow can't really handle symlinks to .
- name: Ensure logs are readable before uploading
delegate_to: localhost
command: "chmod -R u=rwX,g=rX,o=rX {{ zuul.executor.log_root }}/"
# ANSIBLE0007 chmod used in place of argument mode to file
tags:
- skip_ansible_lint
- name: Upload logs to Google Cloud Storage
delegate_to: localhost
zuul_google_storage_upload:
partition: "{{ zuul_log_partition }}"
container: "{{ zuul_log_container }}"
prefix: "{{ zuul_log_path }}"
indexes: "{{ zuul_log_create_indexes }}"
credentials_file: "{{ zuul_log_credentials_file }}"
files:
- "{{ zuul.executor.log_root }}/"
register: upload_results
- name: Return log URL to Zuul
delegate_to: localhost
zuul_return:
data:
zuul:
log_url: "{{ upload_results.url }}/"
when: upload_results is defined

View File

@ -23,3 +23,5 @@ openstacksdk>=0.17.1
requests
requestsexceptions
bs4
# For upload-logs-google
google-cloud-storage