zuul-jobs/roles/upload-logs-gcs/library/test_zuul_google_storage_upload.py
James E. Blair d34881b45a Google logs: Link to index.html files
When generating index.html files for uploading to Google Cloud
Storage, link directories to their index.html files because
(except in certain circumstances) Google does not automatically
serve index.html files at directory URLs.

Change-Id: Ie854de328057ed080c80df9328163da98014ea36
2020-02-05 15:21:37 +00:00

414 lines
18 KiB
Python

# Copyright (C) 2018-2019 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
#
# See the License for the specific language governing permissions and
# limitations under the License.
# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import os
import testtools
import time
import stat
import fixtures
from bs4 import BeautifulSoup
from .zuul_google_storage_upload import FileList, Indexer, FileDetail
FIXTURE_DIR = os.path.join(os.path.dirname(__file__),
'test-fixtures')
class SymlinkFixture(fixtures.Fixture):
links = [
('bad_symlink', '/etc'),
('bad_symlink_file', '/etc/issue'),
('good_symlink', 'controller'),
('recursive_symlink', '.'),
('symlink_file', 'job-output.json'),
('symlink_loop_a', 'symlink_loop'),
('symlink_loop/symlink_loop_b', '..'),
]
def _setUp(self):
self._cleanup()
for (src, target) in self.links:
path = os.path.join(FIXTURE_DIR, 'links', src)
os.symlink(target, path)
self.addCleanup(self._cleanup)
def _cleanup(self):
for (src, target) in self.links:
path = os.path.join(FIXTURE_DIR, 'links', src)
if os.path.exists(path):
os.unlink(path)
class TestFileList(testtools.TestCase):
def assert_files(self, result, files):
self.assertEqual(len(result), len(files))
for expected, received in zip(files, result):
self.assertEqual(expected[0], received.relative_path)
if expected[0] and expected[0][-1] == '/':
efilename = os.path.split(
os.path.dirname(expected[0]))[1] + '/'
else:
efilename = os.path.split(expected[0])[1]
self.assertEqual(efilename, received.filename)
if received.folder:
if received.full_path is not None and expected[0] != '':
self.assertTrue(os.path.isdir(received.full_path))
else:
self.assertTrue(os.path.isfile(received.full_path))
self.assertEqual(expected[1], received.mimetype)
self.assertEqual(expected[2], received.encoding)
def find_file(self, file_list, path):
for f in file_list:
if f.relative_path == path:
return f
def test_single_dir_trailing_slash(self):
'''Test a single directory with a trailing slash'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs/'))
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('zuul-info', 'application/directory', None),
('job-output.json', 'application/json', None),
('controller/subdir', 'application/directory', None),
('controller/compressed.gz', 'text/plain', 'gzip'),
('controller/cpu-load.svg', 'image/svg+xml', None),
('controller/journal.xz', 'text/plain', 'xz'),
('controller/service_log.txt', 'text/plain', None),
('controller/syslog', 'text/plain', None),
('controller/subdir/foo::3.txt', 'text/plain', None),
('controller/subdir/subdir.txt', 'text/plain', None),
('zuul-info/inventory.yaml', 'text/plain', None),
('zuul-info/zuul-info.controller.txt', 'text/plain', None),
])
def test_single_dir(self):
'''Test a single directory without a trailing slash'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs'))
self.assert_files(fl, [
('', 'application/directory', None),
('logs', 'application/directory', None),
('logs/controller', 'application/directory', None),
('logs/zuul-info', 'application/directory', None),
('logs/job-output.json', 'application/json', None),
('logs/controller/subdir', 'application/directory', None),
('logs/controller/compressed.gz', 'text/plain', 'gzip'),
('logs/controller/cpu-load.svg', 'image/svg+xml', None),
('logs/controller/journal.xz', 'text/plain', 'xz'),
('logs/controller/service_log.txt', 'text/plain', None),
('logs/controller/syslog', 'text/plain', None),
('logs/controller/subdir/foo::3.txt', 'text/plain', None),
('logs/controller/subdir/subdir.txt', 'text/plain', None),
('logs/zuul-info/inventory.yaml', 'text/plain', None),
('logs/zuul-info/zuul-info.controller.txt',
'text/plain', None),
])
def test_single_file(self):
'''Test a single file'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR,
'logs/zuul-info/inventory.yaml'))
self.assert_files(fl, [
('', 'application/directory', None),
('inventory.yaml', 'text/plain', None),
])
def test_symlinks(self):
'''Test symlinks'''
with FileList() as fl:
self.useFixture(SymlinkFixture())
fl.add(os.path.join(FIXTURE_DIR, 'links/'))
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('good_symlink', 'application/directory', None),
('recursive_symlink', 'application/directory', None),
('symlink_loop', 'application/directory', None),
('symlink_loop_a', 'application/directory', None),
('job-output.json', 'application/json', None),
('symlink_file', 'text/plain', None),
('controller/service_log.txt', 'text/plain', None),
('symlink_loop/symlink_loop_b', 'application/directory', None),
('symlink_loop/placeholder', 'text/plain', None),
])
def test_index_files(self):
'''Test index generation'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs'))
ix = Indexer(fl)
ix.make_indexes()
self.assert_files(fl, [
('', 'application/directory', None),
('index.html', 'text/html', None),
('logs', 'application/directory', None),
('logs/controller', 'application/directory', None),
('logs/zuul-info', 'application/directory', None),
('logs/job-output.json', 'application/json', None),
('logs/index.html', 'text/html', None),
('logs/controller/subdir', 'application/directory', None),
('logs/controller/compressed.gz', 'text/plain', 'gzip'),
('logs/controller/cpu-load.svg', 'image/svg+xml', None),
('logs/controller/journal.xz', 'text/plain', 'xz'),
('logs/controller/service_log.txt', 'text/plain', None),
('logs/controller/syslog', 'text/plain', None),
('logs/controller/index.html', 'text/html', None),
('logs/controller/subdir/foo::3.txt', 'text/plain', None),
('logs/controller/subdir/subdir.txt', 'text/plain', None),
('logs/controller/subdir/index.html', 'text/html', None),
('logs/zuul-info/inventory.yaml', 'text/plain', None),
('logs/zuul-info/zuul-info.controller.txt',
'text/plain', None),
('logs/zuul-info/index.html', 'text/html', None),
])
top_index = self.find_file(fl, 'index.html')
page = open(top_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0].find('a').get('href'), 'logs/index.html')
self.assertEqual(rows[0].find('a').text, 'logs/')
subdir_index = self.find_file(
fl, 'logs/controller/subdir/index.html')
page = open(subdir_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(rows[0].find('a').get('href'), '../index.html')
self.assertEqual(rows[0].find('a').text, '../')
# Test proper escaping of files with funny names
self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt')
self.assertEqual(rows[1].find('a').text, 'foo::3.txt')
# Test files without escaping
self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt')
self.assertEqual(rows[2].find('a').text, 'subdir.txt')
def test_index_files_trailing_slash(self):
'''Test index generation with a trailing slash'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs/'))
ix = Indexer(fl)
ix.make_indexes()
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('zuul-info', 'application/directory', None),
('job-output.json', 'application/json', None),
('index.html', 'text/html', None),
('controller/subdir', 'application/directory', None),
('controller/compressed.gz', 'text/plain', 'gzip'),
('controller/cpu-load.svg', 'image/svg+xml', None),
('controller/journal.xz', 'text/plain', 'xz'),
('controller/service_log.txt', 'text/plain', None),
('controller/syslog', 'text/plain', None),
('controller/index.html', 'text/html', None),
('controller/subdir/foo::3.txt', 'text/plain', None),
('controller/subdir/subdir.txt', 'text/plain', None),
('controller/subdir/index.html', 'text/html', None),
('zuul-info/inventory.yaml', 'text/plain', None),
('zuul-info/zuul-info.controller.txt', 'text/plain', None),
('zuul-info/index.html', 'text/html', None),
])
top_index = self.find_file(fl, 'index.html')
page = open(top_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(len(rows), 3)
self.assertEqual(rows[0].find('a').get('href'),
'controller/index.html')
self.assertEqual(rows[0].find('a').text, 'controller/')
self.assertEqual(rows[1].find('a').get('href'),
'zuul-info/index.html')
self.assertEqual(rows[1].find('a').text, 'zuul-info/')
subdir_index = self.find_file(fl, 'controller/subdir/index.html')
page = open(subdir_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(rows[0].find('a').get('href'), '../index.html')
self.assertEqual(rows[0].find('a').text, '../')
# Test proper escaping of files with funny names
self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt')
self.assertEqual(rows[1].find('a').text, 'foo::3.txt')
# Test files without escaping
self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt')
self.assertEqual(rows[2].find('a').text, 'subdir.txt')
def test_topdir_parent_link(self):
'''Test index generation creates topdir parent link'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs/'))
ix = Indexer(fl)
ix.make_indexes(
create_parent_links=True,
create_topdir_parent_link=True)
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('zuul-info', 'application/directory', None),
('job-output.json', 'application/json', None),
('index.html', 'text/html', None),
('controller/subdir', 'application/directory', None),
('controller/compressed.gz', 'text/plain', 'gzip'),
('controller/cpu-load.svg', 'image/svg+xml', None),
('controller/journal.xz', 'text/plain', 'xz'),
('controller/service_log.txt', 'text/plain', None),
('controller/syslog', 'text/plain', None),
('controller/index.html', 'text/html', None),
('controller/subdir/foo::3.txt', 'text/plain', None),
('controller/subdir/subdir.txt', 'text/plain', None),
('controller/subdir/index.html', 'text/html', None),
('zuul-info/inventory.yaml', 'text/plain', None),
('zuul-info/zuul-info.controller.txt', 'text/plain', None),
('zuul-info/index.html', 'text/html', None),
])
top_index = self.find_file(fl, 'index.html')
page = open(top_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(len(rows), 4)
self.assertEqual(rows[0].find('a').get('href'),
'../index.html')
self.assertEqual(rows[0].find('a').text, '../')
self.assertEqual(rows[1].find('a').get('href'),
'controller/index.html')
self.assertEqual(rows[1].find('a').text, 'controller/')
self.assertEqual(rows[2].find('a').get('href'),
'zuul-info/index.html')
self.assertEqual(rows[2].find('a').text, 'zuul-info/')
subdir_index = self.find_file(fl, 'controller/subdir/index.html')
page = open(subdir_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(rows[0].find('a').get('href'), '../index.html')
self.assertEqual(rows[0].find('a').text, '../')
# Test proper escaping of files with funny names
self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt')
self.assertEqual(rows[1].find('a').text, 'foo::3.txt')
# Test files without escaping
self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt')
self.assertEqual(rows[2].find('a').text, 'subdir.txt')
def test_no_parent_links(self):
'''Test index generation creates topdir parent link'''
with FileList() as fl:
fl.add(os.path.join(FIXTURE_DIR, 'logs/'))
ix = Indexer(fl)
ix.make_indexes(
create_parent_links=False,
create_topdir_parent_link=False)
self.assert_files(fl, [
('', 'application/directory', None),
('controller', 'application/directory', None),
('zuul-info', 'application/directory', None),
('job-output.json', 'application/json', None),
('index.html', 'text/html', None),
('controller/subdir', 'application/directory', None),
('controller/compressed.gz', 'text/plain', 'gzip'),
('controller/cpu-load.svg', 'image/svg+xml', None),
('controller/journal.xz', 'text/plain', 'xz'),
('controller/service_log.txt', 'text/plain', None),
('controller/syslog', 'text/plain', None),
('controller/index.html', 'text/html', None),
('controller/subdir/foo::3.txt', 'text/plain', None),
('controller/subdir/subdir.txt', 'text/plain', None),
('controller/subdir/index.html', 'text/html', None),
('zuul-info/inventory.yaml', 'text/plain', None),
('zuul-info/zuul-info.controller.txt', 'text/plain', None),
('zuul-info/index.html', 'text/html', None),
])
top_index = self.find_file(fl, 'index.html')
page = open(top_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
self.assertEqual(len(rows), 3)
self.assertEqual(rows[0].find('a').get('href'),
'controller/index.html')
self.assertEqual(rows[0].find('a').text,
'controller/')
self.assertEqual(rows[1].find('a').get('href'),
'zuul-info/index.html')
self.assertEqual(rows[1].find('a').text,
'zuul-info/')
subdir_index = self.find_file(fl, 'controller/subdir/index.html')
page = open(subdir_index.full_path).read()
page = BeautifulSoup(page, 'html.parser')
rows = page.find_all('tr')[1:]
# Test proper escaping of files with funny names
self.assertEqual(rows[0].find('a').get('href'), 'foo%3A%3A3.txt')
self.assertEqual(rows[0].find('a').text, 'foo::3.txt')
# Test files without escaping
self.assertEqual(rows[1].find('a').get('href'), 'subdir.txt')
self.assertEqual(rows[1].find('a').text, 'subdir.txt')
class TestFileDetail(testtools.TestCase):
def test_get_file_detail(self):
'''Test files info'''
path = os.path.join(FIXTURE_DIR, 'logs/job-output.json')
file_detail = FileDetail(path, '')
path_stat = os.stat(path)
self.assertEqual(
time.gmtime(path_stat[stat.ST_MTIME]),
file_detail.last_modified)
self.assertEqual(16, file_detail.size)
def test_get_file_detail_missing_file(self):
'''Test files that go missing during a walk'''
file_detail = FileDetail('missing/file/that/we/cant/find', '')
self.assertEqual(time.gmtime(0), file_detail.last_modified)
self.assertEqual(0, file_detail.size)