From 838b0c88774964fdfb41b9b5ab5e8820571e7a6f Mon Sep 17 00:00:00 2001 From: Albin Vass Date: Sat, 18 Jul 2020 17:42:54 +0200 Subject: [PATCH] Add upload-logs-s3 Change-Id: I6ce64734ed5f20a212e6cb953d09ea2769238bea --- doc/source/log-roles.rst | 1 + roles/upload-logs-s3/README.rst | 66 ++ roles/upload-logs-s3/__init__.py | 0 roles/upload-logs-s3/defaults/main.yaml | 3 + roles/upload-logs-s3/library/__init__.py | 0 .../test-fixtures/artifacts/foo.tar.gz | Bin 0 -> 115 bytes .../library/test-fixtures/artifacts/foo.tgz | Bin 0 -> 115 bytes .../library/test-fixtures/auth.json | 1 + .../links/controller/service_log.txt | 0 .../test-fixtures/links/job-output.json | 1 + .../links/symlink_loop/placeholder | 0 .../logs/controller/compressed.gz | Bin 0 -> 31 bytes .../logs/controller/cpu-load.svg | 3 + .../test-fixtures/logs/controller/journal.xz | Bin 0 -> 32 bytes .../logs/controller/service_log.txt | 0 .../logs/controller/subdir/foo::3.txt | 2 + .../logs/controller/subdir/subdir.txt | 0 .../test-fixtures/logs/controller/syslog | 0 .../test-fixtures/logs/job-output.json | 1 + .../logs/zuul-info/inventory.yaml | 0 .../logs/zuul-info/zuul-info.controller.txt | 0 .../library/test_zuul_s3_upload.py | 393 +++++++++ .../upload-logs-s3/library/zuul_s3_upload.py | 825 ++++++++++++++++++ roles/upload-logs-s3/tasks/main.yaml | 39 + test-playbooks/upload-logs-s3.yaml | 70 ++ test-requirements.txt | 3 + zuul-tests.d/logs-jobs.yaml | 10 + 27 files changed, 1418 insertions(+) create mode 100644 roles/upload-logs-s3/README.rst create mode 100644 roles/upload-logs-s3/__init__.py create mode 100644 roles/upload-logs-s3/defaults/main.yaml create mode 100644 roles/upload-logs-s3/library/__init__.py create mode 100644 roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tar.gz create mode 100644 roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tgz create mode 100644 roles/upload-logs-s3/library/test-fixtures/auth.json create mode 100644 roles/upload-logs-s3/library/test-fixtures/links/controller/service_log.txt create mode 100644 roles/upload-logs-s3/library/test-fixtures/links/job-output.json create mode 100644 roles/upload-logs-s3/library/test-fixtures/links/symlink_loop/placeholder create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/controller/compressed.gz create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/controller/cpu-load.svg create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/controller/journal.xz create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/controller/service_log.txt create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/foo::3.txt create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/subdir.txt create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/controller/syslog create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/job-output.json create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/inventory.yaml create mode 100644 roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/zuul-info.controller.txt create mode 100644 roles/upload-logs-s3/library/test_zuul_s3_upload.py create mode 100755 roles/upload-logs-s3/library/zuul_s3_upload.py create mode 100644 roles/upload-logs-s3/tasks/main.yaml create mode 100644 test-playbooks/upload-logs-s3.yaml diff --git a/doc/source/log-roles.rst b/doc/source/log-roles.rst index 6653b869f..908da080c 100644 --- a/doc/source/log-roles.rst +++ b/doc/source/log-roles.rst @@ -14,4 +14,5 @@ Log Roles .. zuul:autorole:: set-zuul-log-path-fact .. zuul:autorole:: upload-logs .. zuul:autorole:: upload-logs-gcs +.. zuul:autorole:: upload-logs-s3 .. zuul:autorole:: upload-logs-swift diff --git a/roles/upload-logs-s3/README.rst b/roles/upload-logs-s3/README.rst new file mode 100644 index 000000000..f49f2f50d --- /dev/null +++ b/roles/upload-logs-s3/README.rst @@ -0,0 +1,66 @@ +Upload logs to S3 + +Before using this role, create at least one bucket and set up +appropriate access controls or lifecycle events. This role will not +automatically create buckets. + +This role requires the ``boto3`` Python package to be +installed in the Ansible environment on the Zuul executor. + +**Role Variables** + +.. zuul:rolevar:: zuul_site_upload_logs + :default: true + + Controls when logs are uploaded. true, the default, means always + upload logs. false means never upload logs. 'failure' means to only + upload logs when the job has failed. + + .. note:: Intended to be set by admins via site-variables. + +.. zuul:rolevar:: zuul_log_partition + :default: false + + If set to true, then the first component of the log path will be + removed from the object name and added to the bucket name, so that + logs for different changes are distributed across a large number of + buckets. + +.. zuul:rolevar:: zuul_log_bucket + + This role *will not* create buckets which do not already exist. If + partitioning is not enabled, this is the name of the bucket which + will be used. If partitioning is enabled, then this will be used + as the prefix for the bucket name which will be separated from the + partition name by an underscore. For example, "logs_42" would be + the bucket name for partition 42. + + Note that you will want to set this to a value that uniquely + identifies your Zuul installation. + +.. zuul:rolevar:: zuul_log_path + :default: Generated by the role `set-zuul-log-path-fact` + + Prepend this path to the object names when uploading. + +.. zuul:rolevar:: zuul_log_create_indexes + :default: true + + Whether to create `index.html` files with directory indexes. + +.. zuul:rolevar:: zuul_log_path_shard_build + :default: false + + This var is consumed by set-zuul-log-path-fact which + upload-logs-s3 calls into. If you set this you will get log paths + prefixed with the first three characters of the build uuid. This + will improve log file sharding. + + More details can be found at + :zuul:rolevar:`set-zuul-log-path-fact.zuul_log_path_shard_build`. + + +.. zuul:rolevar:: upload_logs_s3_endpoint + + The endpoint to use when uploading logs to an s3 compatible service. + By default this will be automatically constructed by boto but should be set when working with non-aws hosted s3 service. diff --git a/roles/upload-logs-s3/__init__.py b/roles/upload-logs-s3/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/defaults/main.yaml b/roles/upload-logs-s3/defaults/main.yaml new file mode 100644 index 000000000..65ec9198b --- /dev/null +++ b/roles/upload-logs-s3/defaults/main.yaml @@ -0,0 +1,3 @@ +zuul_log_partition: false +zuul_log_create_indexes: true + diff --git a/roles/upload-logs-s3/library/__init__.py b/roles/upload-logs-s3/library/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tar.gz b/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9b1579d90d8178e9a1196bc1b4d87b032cc2eeb2 GIT binary patch literal 115 zcmb2|=3wAb4T@%9etXW4i^-6I<$#T<{)CuG#_t6WniWhJp1;1SsW)QoRiDjA^EOVu z$N$*u(7a2BH(pEk%k`W8Q*2pK(OS#t*PLb*ol-rr<@M_6xw6Yuj}^Q-T^harece|E R82I2`e(ByZSq2RT1_1P;G1344 literal 0 HcmV?d00001 diff --git a/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tgz b/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tgz new file mode 100644 index 0000000000000000000000000000000000000000..ca9fccb9934364912861d8ed720ca0efb9cab910 GIT binary patch literal 115 zcmb2|=3rn~4T@%9etXW4i^-6I<$#T<{)CuG#_t6WniWhJp1;1SsW)QoRiDjA^EOVu z$N$*u(7a2BH(pEk%k`W8Q*2pK(OS#t*PLb*ol-rr<@M_6xw6Yuj}^Q-T^harece|E R82I2`e(ByZSq2RT1_1L_G0y-1 literal 0 HcmV?d00001 diff --git a/roles/upload-logs-s3/library/test-fixtures/auth.json b/roles/upload-logs-s3/library/test-fixtures/auth.json new file mode 100644 index 000000000..8df1606b6 --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/auth.json @@ -0,0 +1 @@ +{"access_token": "something", "expires_in": 3599, "token_type": "Bearer"} diff --git a/roles/upload-logs-s3/library/test-fixtures/links/controller/service_log.txt b/roles/upload-logs-s3/library/test-fixtures/links/controller/service_log.txt new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/links/job-output.json b/roles/upload-logs-s3/library/test-fixtures/links/job-output.json new file mode 100644 index 000000000..c8cd7e92d --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/links/job-output.json @@ -0,0 +1 @@ +{"test": "foo"} diff --git a/roles/upload-logs-s3/library/test-fixtures/links/symlink_loop/placeholder b/roles/upload-logs-s3/library/test-fixtures/links/symlink_loop/placeholder new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/compressed.gz b/roles/upload-logs-s3/library/test-fixtures/logs/controller/compressed.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dc3bad6630cb3a93498446472cd7636cc572230 GIT binary patch literal 31 fcmb2|=HRFk3yNl7PR`FQC`v6ZPEBE8W`F?zd)@{& literal 0 HcmV?d00001 diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/cpu-load.svg b/roles/upload-logs-s3/library/test-fixtures/logs/controller/cpu-load.svg new file mode 100644 index 000000000..01a940a25 --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/logs/controller/cpu-load.svg @@ -0,0 +1,3 @@ + + + diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/journal.xz b/roles/upload-logs-s3/library/test-fixtures/logs/controller/journal.xz new file mode 100644 index 0000000000000000000000000000000000000000..ea28d9e05f69d458ccfaf4aa985d30d366bfbc25 GIT binary patch literal 32 kcmexsUKJ6=z`*kC+7>qkAdtE5qA0)Zb1fr?!x9+<0JCTbcmMzZ literal 0 HcmV?d00001 diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/service_log.txt b/roles/upload-logs-s3/library/test-fixtures/logs/controller/service_log.txt new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/foo::3.txt b/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/foo::3.txt new file mode 100644 index 000000000..384ce7d7f --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/foo::3.txt @@ -0,0 +1,2 @@ +This is a plan text file with a funny name. +The index links should escape the :'s. diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/subdir.txt b/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/subdir.txt new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/syslog b/roles/upload-logs-s3/library/test-fixtures/logs/controller/syslog new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/job-output.json b/roles/upload-logs-s3/library/test-fixtures/logs/job-output.json new file mode 100644 index 000000000..c8cd7e92d --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/logs/job-output.json @@ -0,0 +1 @@ +{"test": "foo"} diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/inventory.yaml b/roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/inventory.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/zuul-info.controller.txt b/roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/zuul-info.controller.txt new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test_zuul_s3_upload.py b/roles/upload-logs-s3/library/test_zuul_s3_upload.py new file mode 100644 index 000000000..4a4075bbb --- /dev/null +++ b/roles/upload-logs-s3/library/test_zuul_s3_upload.py @@ -0,0 +1,393 @@ +# See the License for the specific language governing permissions and +# limitations under the License. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import os +import testtools +import time +import stat +import fixtures + +from bs4 import BeautifulSoup +from .zuul_s3_upload import FileList, Indexer, FileDetail + + +FIXTURE_DIR = os.path.join(os.path.dirname(__file__), + 'test-fixtures') + + +class SymlinkFixture(fixtures.Fixture): + links = [ + ('bad_symlink', '/etc'), + ('bad_symlink_file', '/etc/issue'), + ('good_symlink', 'controller'), + ('recursive_symlink', '.'), + ('symlink_file', 'job-output.json'), + ('symlink_loop_a', 'symlink_loop'), + ('symlink_loop/symlink_loop_b', '..'), + ] + + def _setUp(self): + for (src, target) in self.links: + path = os.path.join(FIXTURE_DIR, 'links', src) + os.symlink(target, path) + self.addCleanup(os.unlink, path) + + +class TestFileList(testtools.TestCase): + + def assert_files(self, result, files): + self.assertEqual(len(result), len(files)) + for expected, received in zip(files, result): + self.assertEqual(expected[0], received.relative_path) + if expected[0] and expected[0][-1] == '/': + efilename = os.path.split( + os.path.dirname(expected[0]))[1] + '/' + else: + efilename = os.path.split(expected[0])[1] + self.assertEqual(efilename, received.filename) + if received.folder: + if received.full_path is not None and expected[0] != '': + self.assertTrue(os.path.isdir(received.full_path)) + else: + self.assertTrue(os.path.isfile(received.full_path)) + self.assertEqual(expected[1], received.mimetype) + self.assertEqual(expected[2], received.encoding) + + def find_file(self, file_list, path): + for f in file_list: + if f.relative_path == path: + return f + + def test_single_dir_trailing_slash(self): + '''Test a single directory with a trailing slash''' + + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs/')) + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('zuul-info', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('controller/subdir', 'application/directory', None), + ('controller/compressed.gz', 'text/plain', 'gzip'), + ('controller/cpu-load.svg', 'image/svg+xml', None), + ('controller/journal.xz', 'text/plain', 'xz'), + ('controller/service_log.txt', 'text/plain', None), + ('controller/syslog', 'text/plain', None), + ('controller/subdir/foo::3.txt', 'text/plain', None), + ('controller/subdir/subdir.txt', 'text/plain', None), + ('zuul-info/inventory.yaml', 'text/plain', None), + ('zuul-info/zuul-info.controller.txt', 'text/plain', None), + ]) + + def test_single_dir(self): + '''Test a single directory without a trailing slash''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs')) + self.assert_files(fl, [ + ('', 'application/directory', None), + ('logs', 'application/directory', None), + ('logs/controller', 'application/directory', None), + ('logs/zuul-info', 'application/directory', None), + ('logs/job-output.json', 'application/json', None), + ('logs/controller/subdir', 'application/directory', None), + ('logs/controller/compressed.gz', 'text/plain', 'gzip'), + ('logs/controller/cpu-load.svg', 'image/svg+xml', None), + ('logs/controller/journal.xz', 'text/plain', 'xz'), + ('logs/controller/service_log.txt', 'text/plain', None), + ('logs/controller/syslog', 'text/plain', None), + ('logs/controller/subdir/foo::3.txt', 'text/plain', None), + ('logs/controller/subdir/subdir.txt', 'text/plain', None), + ('logs/zuul-info/inventory.yaml', 'text/plain', None), + ('logs/zuul-info/zuul-info.controller.txt', + 'text/plain', None), + ]) + + def test_single_file(self): + '''Test a single file''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, + 'logs/zuul-info/inventory.yaml')) + self.assert_files(fl, [ + ('', 'application/directory', None), + ('inventory.yaml', 'text/plain', None), + ]) + + def test_symlinks(self): + '''Test symlinks''' + with FileList() as fl: + self.useFixture(SymlinkFixture()) + fl.add(os.path.join(FIXTURE_DIR, 'links/')) + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('good_symlink', 'application/directory', None), + ('recursive_symlink', 'application/directory', None), + ('symlink_loop', 'application/directory', None), + ('symlink_loop_a', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('symlink_file', 'text/plain', None), + ('controller/service_log.txt', 'text/plain', None), + ('symlink_loop/symlink_loop_b', 'application/directory', None), + ('symlink_loop/placeholder', 'text/plain', None), + ]) + + def test_index_files(self): + '''Test index generation''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs')) + ix = Indexer(fl) + ix.make_indexes() + + self.assert_files(fl, [ + ('', 'application/directory', None), + ('index.html', 'text/html', None), + ('logs', 'application/directory', None), + ('logs/controller', 'application/directory', None), + ('logs/zuul-info', 'application/directory', None), + ('logs/job-output.json', 'application/json', None), + ('logs/index.html', 'text/html', None), + ('logs/controller/subdir', 'application/directory', None), + ('logs/controller/compressed.gz', 'text/plain', 'gzip'), + ('logs/controller/cpu-load.svg', 'image/svg+xml', None), + ('logs/controller/journal.xz', 'text/plain', 'xz'), + ('logs/controller/service_log.txt', 'text/plain', None), + ('logs/controller/syslog', 'text/plain', None), + ('logs/controller/index.html', 'text/html', None), + ('logs/controller/subdir/foo::3.txt', 'text/plain', None), + ('logs/controller/subdir/subdir.txt', 'text/plain', None), + ('logs/controller/subdir/index.html', 'text/html', None), + ('logs/zuul-info/inventory.yaml', 'text/plain', None), + ('logs/zuul-info/zuul-info.controller.txt', + 'text/plain', None), + ('logs/zuul-info/index.html', 'text/html', None), + ]) + + top_index = self.find_file(fl, 'index.html') + page = open(top_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + self.assertEqual(len(rows), 1) + + self.assertEqual(rows[0].find('a').get('href'), 'logs/index.html') + self.assertEqual(rows[0].find('a').text, 'logs/') + + subdir_index = self.find_file( + fl, 'logs/controller/subdir/index.html') + page = open(subdir_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + self.assertEqual(rows[0].find('a').get('href'), '../index.html') + self.assertEqual(rows[0].find('a').text, '../') + + # Test proper escaping of files with funny names + self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt') + self.assertEqual(rows[1].find('a').text, 'foo::3.txt') + # Test files without escaping + self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt') + self.assertEqual(rows[2].find('a').text, 'subdir.txt') + + def test_index_files_trailing_slash(self): + '''Test index generation with a trailing slash''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs/')) + ix = Indexer(fl) + ix.make_indexes() + + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('zuul-info', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('index.html', 'text/html', None), + ('controller/subdir', 'application/directory', None), + ('controller/compressed.gz', 'text/plain', 'gzip'), + ('controller/cpu-load.svg', 'image/svg+xml', None), + ('controller/journal.xz', 'text/plain', 'xz'), + ('controller/service_log.txt', 'text/plain', None), + ('controller/syslog', 'text/plain', None), + ('controller/index.html', 'text/html', None), + ('controller/subdir/foo::3.txt', 'text/plain', None), + ('controller/subdir/subdir.txt', 'text/plain', None), + ('controller/subdir/index.html', 'text/html', None), + ('zuul-info/inventory.yaml', 'text/plain', None), + ('zuul-info/zuul-info.controller.txt', 'text/plain', None), + ('zuul-info/index.html', 'text/html', None), + ]) + + top_index = self.find_file(fl, 'index.html') + page = open(top_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + self.assertEqual(len(rows), 3) + + self.assertEqual(rows[0].find('a').get('href'), + 'controller/index.html') + self.assertEqual(rows[0].find('a').text, 'controller/') + + self.assertEqual(rows[1].find('a').get('href'), + 'zuul-info/index.html') + self.assertEqual(rows[1].find('a').text, 'zuul-info/') + + subdir_index = self.find_file(fl, 'controller/subdir/index.html') + page = open(subdir_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + self.assertEqual(rows[0].find('a').get('href'), '../index.html') + self.assertEqual(rows[0].find('a').text, '../') + + # Test proper escaping of files with funny names + self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt') + self.assertEqual(rows[1].find('a').text, 'foo::3.txt') + # Test files without escaping + self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt') + self.assertEqual(rows[2].find('a').text, 'subdir.txt') + + def test_topdir_parent_link(self): + '''Test index generation creates topdir parent link''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs/')) + ix = Indexer(fl) + ix.make_indexes( + create_parent_links=True, + create_topdir_parent_link=True) + + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('zuul-info', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('index.html', 'text/html', None), + ('controller/subdir', 'application/directory', None), + ('controller/compressed.gz', 'text/plain', 'gzip'), + ('controller/cpu-load.svg', 'image/svg+xml', None), + ('controller/journal.xz', 'text/plain', 'xz'), + ('controller/service_log.txt', 'text/plain', None), + ('controller/syslog', 'text/plain', None), + ('controller/index.html', 'text/html', None), + ('controller/subdir/foo::3.txt', 'text/plain', None), + ('controller/subdir/subdir.txt', 'text/plain', None), + ('controller/subdir/index.html', 'text/html', None), + ('zuul-info/inventory.yaml', 'text/plain', None), + ('zuul-info/zuul-info.controller.txt', 'text/plain', None), + ('zuul-info/index.html', 'text/html', None), + ]) + + top_index = self.find_file(fl, 'index.html') + page = open(top_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + self.assertEqual(len(rows), 4) + + self.assertEqual(rows[0].find('a').get('href'), + '../index.html') + self.assertEqual(rows[0].find('a').text, '../') + + self.assertEqual(rows[1].find('a').get('href'), + 'controller/index.html') + self.assertEqual(rows[1].find('a').text, 'controller/') + + self.assertEqual(rows[2].find('a').get('href'), + 'zuul-info/index.html') + self.assertEqual(rows[2].find('a').text, 'zuul-info/') + + subdir_index = self.find_file(fl, 'controller/subdir/index.html') + page = open(subdir_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + self.assertEqual(rows[0].find('a').get('href'), '../index.html') + self.assertEqual(rows[0].find('a').text, '../') + + # Test proper escaping of files with funny names + self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt') + self.assertEqual(rows[1].find('a').text, 'foo::3.txt') + # Test files without escaping + self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt') + self.assertEqual(rows[2].find('a').text, 'subdir.txt') + + def test_no_parent_links(self): + '''Test index generation creates topdir parent link''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs/')) + ix = Indexer(fl) + ix.make_indexes( + create_parent_links=False, + create_topdir_parent_link=False) + + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('zuul-info', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('index.html', 'text/html', None), + ('controller/subdir', 'application/directory', None), + ('controller/compressed.gz', 'text/plain', 'gzip'), + ('controller/cpu-load.svg', 'image/svg+xml', None), + ('controller/journal.xz', 'text/plain', 'xz'), + ('controller/service_log.txt', 'text/plain', None), + ('controller/syslog', 'text/plain', None), + ('controller/index.html', 'text/html', None), + ('controller/subdir/foo::3.txt', 'text/plain', None), + ('controller/subdir/subdir.txt', 'text/plain', None), + ('controller/subdir/index.html', 'text/html', None), + ('zuul-info/inventory.yaml', 'text/plain', None), + ('zuul-info/zuul-info.controller.txt', 'text/plain', None), + ('zuul-info/index.html', 'text/html', None), + ]) + + top_index = self.find_file(fl, 'index.html') + page = open(top_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + self.assertEqual(len(rows), 3) + + self.assertEqual(rows[0].find('a').get('href'), + 'controller/index.html') + self.assertEqual(rows[0].find('a').text, + 'controller/') + + self.assertEqual(rows[1].find('a').get('href'), + 'zuul-info/index.html') + self.assertEqual(rows[1].find('a').text, + 'zuul-info/') + + subdir_index = self.find_file(fl, 'controller/subdir/index.html') + page = open(subdir_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + # Test proper escaping of files with funny names + self.assertEqual(rows[0].find('a').get('href'), 'foo%3A%3A3.txt') + self.assertEqual(rows[0].find('a').text, 'foo::3.txt') + # Test files without escaping + self.assertEqual(rows[1].find('a').get('href'), 'subdir.txt') + self.assertEqual(rows[1].find('a').text, 'subdir.txt') + + +class TestFileDetail(testtools.TestCase): + + def test_get_file_detail(self): + '''Test files info''' + path = os.path.join(FIXTURE_DIR, 'logs/job-output.json') + file_detail = FileDetail(path, '') + path_stat = os.stat(path) + self.assertEqual( + time.gmtime(path_stat[stat.ST_MTIME]), + file_detail.last_modified) + self.assertEqual(16, file_detail.size) + + def test_get_file_detail_missing_file(self): + '''Test files that go missing during a walk''' + + file_detail = FileDetail('missing/file/that/we/cant/find', '') + + self.assertEqual(time.gmtime(0), file_detail.last_modified) + self.assertEqual(0, file_detail.size) diff --git a/roles/upload-logs-s3/library/zuul_s3_upload.py b/roles/upload-logs-s3/library/zuul_s3_upload.py new file mode 100755 index 000000000..d6e9a9842 --- /dev/null +++ b/roles/upload-logs-s3/library/zuul_s3_upload.py @@ -0,0 +1,825 @@ +#!/usr/bin/env python3 +# +# Copyright 2014 Rackspace Australia +# Copyright 2018 Red Hat, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + + +""" +Utility to upload files to s3 +""" + +import argparse +import gzip +import io +import logging +import mimetypes +import os +try: + import queue as queuelib +except ImportError: + import Queue as queuelib +import shutil +import stat +import sys +import tempfile +import threading +import time +try: + import urllib.parse as urlparse +except ImportError: + import urllib as urlparse +import zlib +import collections + +import boto3 +from ansible.module_utils.basic import AnsibleModule + +try: + # Python 3.3+ + from collections.abc import Sequence +except ImportError: + from collections import Sequence + + +mimetypes.init() +mimetypes.add_type('text/plain', '.yaml') + +MAX_UPLOAD_THREADS = 24 +POST_ATTEMPTS = 3 + +# Map mime types to apache icons +APACHE_MIME_ICON_MAP = { + '_default': 'unknown.png', + 'application/gzip': 'compressed.png', + 'application/directory': 'folder.png', + 'text/html': 'text.png', + 'text/plain': 'text.png', +} + +# Map mime types to apache icons +APACHE_FILE_ICON_MAP = { + '..': 'back.png', +} + +# These icon files are from the Apache project and are in the public +# domain. +ICON_IMAGES = { + 'back.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAElBMVEX/' + '///M//+ZmZlmZmYzMzMAAACei5rnAAAAAnRSTlP/AOW3MEoAAABWSURB' + 'VHjabdBBCgAhDEPRRpv7X3kwEMsQ//IRRC08urjRHbha5VLFUsVSxVI9' + 'lmDh5hMpHD6n0EgoiZG0DNINpnWlcVXaRix76e1/8dddcL6nG0Ri9gHj' + 'tgSXKYeLBgAAAABJRU5ErkJggg==', + 'compressed.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAADAFBM' + 'VEX//////8z//5n//2b//zP//wD/zP//zMz/zJn/zGb/zDP/zAD/' + 'mf//mcz/mZn/mWb/mTP/mQD/Zv//Zsz/Zpn/Zmb/ZjP/ZgD/M///' + 'M8z/M5n/M2b/MzP/MwD/AP//AMz/AJn/AGb/ADP/AADM///M/8zM' + '/5nM/2bM/zPM/wDMzP/MzMzMzJnMzGbMzDPMzADMmf/MmczMmZnM' + 'mWbMmTPMmQDMZv/MZszMZpnMZmbMZjPMZgDMM//MM8zMM5nMM2bM' + 'MzPMMwDMAP/MAMzMAJnMAGbMADPMAACZ//+Z/8yZ/5mZ/2aZ/zOZ' + '/wCZzP+ZzMyZzJmZzGaZzDOZzACZmf+ZmcyZmZmZmWaZmTOZmQCZ' + 'Zv+ZZsyZZpmZZmaZZjOZZgCZM/+ZM8yZM5mZM2aZMzOZMwCZAP+Z' + 'AMyZAJmZAGaZADOZAABm//9m/8xm/5lm/2Zm/zNm/wBmzP9mzMxm' + 'zJlmzGZmzDNmzABmmf9mmcxmmZlmmWZmmTNmmQBmZv9mZsxmZplm' + 'ZmZmZjNmZgBmM/9mM8xmM5lmM2ZmMzNmMwBmAP9mAMxmAJlmAGZm' + 'ADNmAAAz//8z/8wz/5kz/2Yz/zMz/wAzzP8zzMwzzJkzzGYzzDMz' + 'zAAzmf8zmcwzmZkzmWYzmTMzmQAzZv8zZswzZpkzZmYzZjMzZgAz' + 'M/8zM8wzM5kzM2YzMzMzMwAzAP8zAMwzAJkzAGYzADMzAAAA//8A' + '/8wA/5kA/2YA/zMA/wAAzP8AzMwAzJkAzGYAzDMAzAAAmf8AmcwA' + 'mZkAmWYAmTMAmQAAZv8AZswAZpkAZmYAZjMAZgAAM/8AM8wAM5kA' + 'M2YAMzMAMwAAAP8AAMwAAJkAAGYAADPuAADdAAC7AACqAACIAAB3' + 'AABVAABEAAAiAAARAAAA7gAA3QAAuwAAqgAAiAAAdwAAVQAARAAA' + 'IgAAEQAAAO4AAN0AALsAAKoAAIgAAHcAAFUAAEQAACIAABHu7u7d' + '3d27u7uqqqqIiIh3d3dVVVVEREQiIiIREREAAAD7CIKZAAAAJXRS' + 'TlP///////////////////////////////////////////////8A' + 'P89CTwAAAGtJREFUeNp9z9ENgDAIhOEOco+dybVuEXasFMRDY/x5' + '+xJCO6Znu6kSx7BhXyjtKBWWNlwW88Loid7hFRKBXiIYCMfMEYUQ' + 'QohC3CjFA5nIjqx1CqlDLGR/EhM5O06yvin0ftGOyIS7lV14AsQN' + 'aR7rMEBYAAAAAElFTkSuQmCC', + 'folder.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAElBMVEX/' + '////zJnM//+ZZjMzMzMAAADCEvqoAAAAA3RSTlP//wDXyg1BAAAASElE' + 'QVR42s3KQQ6AQAhDUaXt/a/sQDrRJu7c+NmQB0e99B3lnqjT6cYx6zSI' + 'bV40n3D7psYMoBoz4w8/EdNYQsbGEjNxYSljXTEsA9O1pLTvAAAAAElF' + 'TkSuQmCC', + 'text.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAD1BMVEX/' + '///M//+ZmZkzMzMAAABVsTOVAAAAAnRSTlP/AOW3MEoAAABISURBVHja' + 'tcrRCgAgCENRbf7/N7dKomGvngjhMsPLD4NdMPwia438NRIyxsaL/XQZ' + 'hyxpkC6zyjLXGVXnkhqWJWIIrOgeinECLlUCjBCqNQoAAAAASUVORK5C' + 'YII=', + 'unknown.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAD1BMVEX/' + '///M//+ZmZkzMzMAAABVsTOVAAAAAnRSTlP/AOW3MEoAAABYSURBVHja' + 'ncvRDoAgDEPRruX/v1kmNHPBxMTLyzgD6FmsILg56g2hQnJkOco4yZhq' + 'tN5nYd5Zq0LsHblwxwP9GTCWsaGtoelANKzOlz/RfaLYUmLE6E28ALlN' + 'AupSdoFsAAAAAElFTkSuQmCC'} + + +# Begin vendored code +# This code is licensed under the Public Domain/CC0 and comes from +# https://github.com/leenr/gzip-stream/blob/master/gzip_stream.py +# Code was modified: +# removed type annotations to support python2. +# removed use of *, somearg for positional anonymous args. +# Default compression level to 9. + +class GZIPCompressedStream(io.RawIOBase): + def __init__(self, stream, compression_level=9): + assert 1 <= compression_level <= 9 + + self._compression_level = compression_level + self._stream = stream + + self._compressed_stream = io.BytesIO() + self._compressor = gzip.GzipFile( + mode='wb', + fileobj=self._compressed_stream, + compresslevel=compression_level + ) + + # because of the GZIP header written by `GzipFile.__init__`: + self._compressed_stream.seek(0) + + @property + def compression_level(self): + return self._compression_level + + @property + def stream(self): + return self._stream + + def readable(self): + return True + + def _read_compressed_into(self, b): + buf = self._compressed_stream.read(len(b)) + b[:len(buf)] = buf + return len(buf) + + def readinto(self, b): + b = memoryview(b) + + offset = 0 + size = len(b) + while offset < size: + offset += self._read_compressed_into(b[offset:]) + if offset < size: + # self._compressed_buffer now empty + if self._compressor.closed: + # nothing to compress anymore + break + # compress next bytes + self._read_n_compress(size) + + return offset + + def _read_n_compress(self, size): + assert size > 0 + + data = self._stream.read(size) + + # rewind buffer to the start to free up memory + # (because anything currently in the buffer should be already + # streamed off the object) + self._compressed_stream.seek(0) + self._compressed_stream.truncate(0) + + if data: + self._compressor.write(data) + else: + # this will write final data (will flush zlib with Z_FINISH) + self._compressor.close() + + # rewind to the buffer start + self._compressed_stream.seek(0) + + def __repr__(self): + return ( + '{self.__class__.__name__}(' + '{self.stream!r}, ' + 'compression_level={self.compression_level!r}' + ')' + ).format(self=self) + +# End vendored code + + +def get_mime_icon(mime, filename=''): + icon = (APACHE_FILE_ICON_MAP.get(filename) or + APACHE_MIME_ICON_MAP.get(mime) or + APACHE_MIME_ICON_MAP['_default']) + return "data:image/png;base64,%s" % ICON_IMAGES[icon] + + +def retry_function(func): + for attempt in range(1, POST_ATTEMPTS + 1): + try: + return func() + except Exception: + if attempt >= POST_ATTEMPTS: + raise + else: + logging.exception("Error on attempt %d" % attempt) + time.sleep(attempt * 10) + + +def sizeof_fmt(num, suffix='B'): + # From http://stackoverflow.com/questions/1094841/ + # reusable-library-to-get-human-readable-version-of-file-size + for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: + if abs(num) < 1024.0: + return "%3.1f%s%s" % (num, unit, suffix) + num /= 1024.0 + return "%.1f%s%s" % (num, 'Y', suffix) + + +class FileDetail(): + """ + Used to generate indexes with links or as the file path + to push to s3. + """ + + def __init__(self, full_path, relative_path, filename=None): + """ + Args: + full_path (str): The absolute path to the file on disk. + relative_path (str): The relative path from the artifacts source + used for links. + filename (str): An optional alternate filename in links. + """ + # Make FileNotFoundError exception to be compatible with python2 + try: + FileNotFoundError # noqa: F823 + except NameError: + FileNotFoundError = OSError + + self.full_path = full_path + if filename is None: + self.filename = os.path.basename(full_path) + else: + self.filename = filename + self.relative_path = relative_path + + if self.full_path and os.path.isfile(self.full_path): + mime_guess, encoding = mimetypes.guess_type(self.full_path) + self.mimetype = mime_guess if mime_guess else 'text/plain' + self.encoding = encoding + self.folder = False + else: + self.mimetype = 'application/directory' + self.encoding = None + self.folder = True + try: + st = os.stat(self.full_path) + self.last_modified = time.gmtime(st[stat.ST_MTIME]) + self.size = st[stat.ST_SIZE] + except (FileNotFoundError, TypeError): + self.last_modified = time.gmtime(0) + self.size = 0 + + def __repr__(self): + t = 'Folder' if self.folder else 'File' + return '<%s %s>' % (t, self.relative_path) + + +class FileList(Sequence): + '''A collection of FileDetail objects + + This is a list-like group of FileDetail objects, intended to be + used as a context manager around the upload process. + ''' + def __init__(self): + self.file_list = [] + self.file_list.append(FileDetail(None, '', '')) + self.tempdirs = [] + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + for tempdir in self.tempdirs: + shutil.rmtree(tempdir) + + def __getitem__(self, item): + return self.file_list.__getitem__(item) + + def __len__(self): + return self.file_list.__len__() + + def get_tempdir(self): + '''Get a temporary directory + + Returns path to a private temporary directory which will be + cleaned on exit + ''' + tempdir = tempfile.mkdtemp(prefix='s-u-l-tmp') + self.tempdirs.append(tempdir) + return tempdir + + @staticmethod + def _path_in_tree(root, path): + full_path = os.path.realpath(os.path.abspath( + os.path.expanduser(path))) + if not full_path.startswith(root): + logging.debug("Skipping path outside root: %s" % (path,)) + return False + return True + + def add(self, file_path): + """ + Generate a list of files to upload to swift. Recurses through + directories + """ + + # file_list: A list of FileDetails to push to swift + file_list = [] + + if os.path.isfile(file_path): + relative_path = os.path.basename(file_path) + file_list.append(FileDetail(file_path, relative_path)) + elif os.path.isdir(file_path): + original_root = os.path.realpath(os.path.abspath( + os.path.expanduser(file_path))) + + parent_dir = os.path.dirname(file_path) + if not file_path.endswith('/'): + filename = os.path.basename(file_path) + full_path = file_path + relative_name = os.path.relpath(full_path, parent_dir) + file_list.append(FileDetail(full_path, relative_name, + filename)) + # TODO: this will copy the result of symlinked files, but + # it won't follow directory symlinks. If we add that, we + # should ensure that we don't loop. + for path, folders, files in os.walk(file_path): + # Sort folder in-place so that we recurse in order. + files.sort(key=lambda x: x.lower()) + folders.sort(key=lambda x: x.lower()) + # relative_path: The path between the given directory + # and the one being currently walked. + relative_path = os.path.relpath(path, parent_dir) + + for filename in folders: + full_path = os.path.join(path, filename) + if not self._path_in_tree(original_root, full_path): + continue + relative_name = os.path.relpath(full_path, parent_dir) + file_list.append(FileDetail(full_path, relative_name, + filename)) + + for filename in files: + full_path = os.path.join(path, filename) + if not self._path_in_tree(original_root, full_path): + continue + relative_name = os.path.relpath(full_path, parent_dir) + file_detail = FileDetail(full_path, relative_name) + file_list.append(file_detail) + + self.file_list += file_list + + +class Indexer(): + """Index a FileList + + Functions to generate indexes and other collated data for a + FileList + + - make_indexes() : make index.html in folders + """ + def __init__(self, file_list): + ''' + Args: + file_list (FileList): A FileList object with all files + to be indexed. + ''' + assert isinstance(file_list, FileList) + self.file_list = file_list + + def _make_index_file(self, folder_links, title, tempdir, append_footer): + """Writes an index into a file for pushing""" + for file_details in folder_links: + # Do not generate an index file if one exists already. + # This may be the case when uploading other machine generated + # content like python coverage info. + if self.index_filename == file_details.filename: + return + index_content = self._generate_log_index( + folder_links, title, append_footer) + fd = open(os.path.join(tempdir, self.index_filename), 'w') + fd.write(index_content) + return os.path.join(tempdir, self.index_filename) + + def _generate_log_index(self, folder_links, title, append_footer): + """Create an index of logfiles and links to them""" + + output = '%s\n' % title + output += '

%s

\n' % title + output += '' + output += '' + + file_details_to_append = None + for file_details in folder_links: + output += '' + output += ( + '' % ({ + 'm': file_details.mimetype, + 'i': get_mime_icon(file_details.mimetype, + file_details.filename), + })) + filename = file_details.filename + link_filename = filename + if file_details.folder: + filename += '/' + link_filename += '/index.html' + output += '' % ( + urlparse.quote(link_filename), + filename) + output += '' % time.asctime( + file_details.last_modified) + size = sizeof_fmt(file_details.size, suffix='') + output += '' % size + output += '\n' + + if (append_footer and + append_footer in file_details.filename): + file_details_to_append = file_details + + output += '
NameLast ModifiedSize
[ ]%s%s%s
' + + if file_details_to_append: + output += '

' + try: + with open(file_details_to_append.full_path, 'r') as f: + output += f.read() + except IOError: + logging.exception("Error opening file for appending") + + output += '\n' + return output + + def make_indexes(self, create_parent_links=True, + create_topdir_parent_link=False, + append_footer='index_footer.html'): + '''Make index.html files + + Iterate the file list and crete index.html files for folders + + Args: + create_parent_links (bool): Create parent links + create_topdir_parent_link (bool): Create topdir parent link + append_footer (str): Filename of a footer to append to each + generated page + + Return: + No value, the self.file_list will be updated + ''' + self.index_filename = 'index.html' + + folders = collections.OrderedDict() + for f in self.file_list: + if f.folder: + folders[f.relative_path] = [] + folder = os.path.dirname(os.path.dirname( + f.relative_path + '/')) + if folder == '/': + folder = '' + else: + folder = os.path.dirname(f.relative_path) + folders[folder].append(f) + + indexes = {} + parent_file_detail = FileDetail(None, '..', '..') + for folder, files in folders.items(): + # Don't add the pseudo-top-directory + if files and files[0].full_path is None: + files = files[1:] + if create_topdir_parent_link: + files = [parent_file_detail] + files + elif create_parent_links: + files = [parent_file_detail] + files + + # Do generate a link to the parent directory + full_path = self._make_index_file(files, 'Index of %s' % (folder,), + self.file_list.get_tempdir(), + append_footer) + + if full_path: + filename = os.path.basename(full_path) + relative_name = os.path.join(folder, filename) + indexes[folder] = FileDetail(full_path, relative_name) + + # This appends the index file at the end of the group of files + # for each directory. + new_list = [] + last_dirname = None + for f in reversed(list(self.file_list)): + if f.folder: + relative_path = f.relative_path + '/' + else: + relative_path = f.relative_path + dirname = os.path.dirname(relative_path) + if dirname == '/': + dirname = '' + if dirname != last_dirname: + index = indexes.pop(dirname, None) + if index: + new_list.append(index) + last_dirname = dirname + new_list.append(f) + new_list.reverse() + self.file_list.file_list = new_list + + +class GzipFilter(): + chunk_size = 16384 + + def __init__(self, infile): + self.gzipfile = GZIPCompressedStream(infile) + self.done = False + + def __iter__(self): + return self + + def __next__(self): + if self.done: + self.gzipfile.close() + raise StopIteration() + data = self.gzipfile.read(self.chunk_size) + if not data: + self.done = True + return data + + +class DeflateFilter(): + chunk_size = 16384 + + def __init__(self, infile): + self.infile = infile + self.encoder = zlib.compressobj() + self.done = False + + def __iter__(self): + return self + + def __next__(self): + if self.done: + raise StopIteration() + ret = b'' + while True: + data = self.infile.read(self.chunk_size) + if data: + ret = self.encoder.compress(data) + if ret: + break + else: + self.done = True + ret = self.encoder.flush() + break + return ret + + +class Uploader(): + def __init__(self, bucket, endpoint=None, prefix=None, + dry_run=False, aws_access_key=None, aws_secret_key=None): + self.dry_run = dry_run + if dry_run: + self.url = 'http://dry-run-url.com/a/path/' + return + + self.prefix = prefix or '' + + if endpoint: + self.endpoint = endpoint + else: + self.endpoint = 'https://s3.amazonaws.com/' + + self.url = os.path.join(self.endpoint, + bucket, self.prefix) + + self.s3 = boto3.resource('s3', + endpoint_url=self.endpoint, + aws_access_key_id=aws_access_key, + aws_secret_access_key=aws_secret_key) + self.bucket = self.s3.Bucket(bucket) + + def upload(self, file_list): + """Spin up thread pool to upload to storage""" + + if self.dry_run: + return + + num_threads = min(len(file_list), MAX_UPLOAD_THREADS) + threads = [] + queue = queuelib.Queue() + # add items to queue + for f in file_list: + queue.put(f) + + failures = [] + for x in range(num_threads): + t = threading.Thread(target=self.post_thread, args=(queue, + failures)) + threads.append(t) + t.start() + + for t in threads: + t.join() + + return failures + + def post_thread(self, queue, failures): + while True: + try: + file_detail = queue.get_nowait() + logging.debug("%s: processing job %s", + threading.current_thread(), + file_detail) + retry_function(lambda: self._post_file(file_detail)) + except IOError as e: + # Do our best to attempt to upload all the files + logging.exception("Error opening file") + failures.append({ + "file": file_detail.filename, + "error": "{}".format(e) + }) + continue + except queuelib.Empty: + # No more work to do + return + except Exception as e: + failures.append({ + "file": file_detail.filename, + "error": "{}".format(e) + }) + + @staticmethod + def _is_text_type(mimetype): + # We want to compress all text types. + if mimetype.startswith('text/'): + return True + + # Further compress types that typically contain text but are no + # text sub type. + compress_types = [ + 'application/json', + 'image/svg+xml', + ] + if mimetype in compress_types: + return True + return False + + def _post_file(self, file_detail): + relative_path = os.path.join(self.prefix, file_detail.relative_path) + content_encoding = None + + if not file_detail.folder: + if (file_detail.encoding is None and + self._is_text_type(file_detail.mimetype)): + content_encoding = 'gzip' + data = GZIPCompressedStream(open(file_detail.full_path, 'rb')) + else: + if (not file_detail.filename.endswith(".gz") and + file_detail.encoding): + # Don't apply gzip encoding to files that we receive as + # already gzipped. The reason for this is storage will + # serve this back to users as an uncompressed file if they + # don't set an accept-encoding that includes gzip. This + # can cause problems when the desired file state is + # compressed as with .tar.gz tarballs. + content_encoding = file_detail.encoding + data = open(file_detail.full_path, 'rb') + self.bucket.upload_fileobj( + data, + relative_path, + ExtraArgs=dict( + ContentType=file_detail.mimetype, + ContentEncoding=content_encoding + ) + ) + + +def run(bucket, files, endpoint=None, + indexes=True, parent_links=True, topdir_parent_link=False, + partition=False, footer='index_footer.html', + prefix=None, aws_access_key=None, aws_secret_key=None): + + if prefix: + prefix = prefix.lstrip('/') + if partition and prefix: + parts = prefix.split('/') + if len(parts) > 1: + bucket += '_' + parts[0] + prefix = '/'.join(parts[1:]) + + # Create the objects to make sure the arguments are sound. + with FileList() as file_list: + # Scan the files. + for file_path in files: + file_list.add(file_path) + + indexer = Indexer(file_list) + + # (Possibly) make indexes. + if indexes: + indexer.make_indexes(create_parent_links=parent_links, + create_topdir_parent_link=topdir_parent_link, + append_footer=footer) + + logging.debug("List of files prepared to upload:") + for x in file_list: + logging.debug(x) + + # Upload. + uploader = Uploader(bucket, + endpoint, + prefix, + aws_access_key=aws_access_key, + aws_secret_key=aws_secret_key) + upload_failures = uploader.upload(file_list) + + return uploader.url, upload_failures + + +def ansible_main(): + module = AnsibleModule( + argument_spec=dict( + bucket=dict(required=True, type='str'), + files=dict(required=True, type='list'), + partition=dict(type='bool', default=False), + indexes=dict(type='bool', default=True), + parent_links=dict(type='bool', default=True), + topdir_parent_link=dict(type='bool', default=False), + footer=dict(type='str'), + prefix=dict(type='str'), + endpoint=dict(type='str'), + aws_access_key=dict(type='str'), + aws_secret_key=dict(type='str', no_log=True), + ) + ) + + p = module.params + url, failures = run(p.get('bucket'), p.get('files'), + p.get('endpoint'), + indexes=p.get('indexes'), + parent_links=p.get('parent_links'), + topdir_parent_link=p.get('topdir_parent_link'), + partition=p.get('partition'), + footer=p.get('footer'), + prefix=p.get('prefix'), + aws_access_key=p.get('aws_access_key'), + aws_secret_key=p.get('aws_secret_key')) + if failures: + module.fail_json(changed=True, + url=url, + failures=failures) + module.exit_json(changed=True, + url=url, + failures=failures) + + +def cli_main(): + parser = argparse.ArgumentParser( + description="Upload files s3" + ) + parser.add_argument('--verbose', action='store_true', + help='show debug information') + parser.add_argument('--endpoint', + help='http endpoint of s3 service') + parser.add_argument('--prefix', + help='Prepend this path to the object names when ' + 'uploading') + parser.add_argument('bucket', + help='Name of the bucket to use when uploading') + parser.add_argument('files', nargs='+', + help='the file(s) to upload with recursive glob ' + 'matching when supplied as a string') + + args = parser.parse_args() + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + logging.captureWarnings(True) + + url = run(args.bucket, args.files, + prefix=args.prefix, + endpoint=args.endpoint) + print(url) + + +if __name__ == '__main__': + if sys.stdin.isatty(): + cli_main() + else: + ansible_main() diff --git a/roles/upload-logs-s3/tasks/main.yaml b/roles/upload-logs-s3/tasks/main.yaml new file mode 100644 index 000000000..53bf6e3b0 --- /dev/null +++ b/roles/upload-logs-s3/tasks/main.yaml @@ -0,0 +1,39 @@ +- name: Set zuul-log-path fact + include_role: + name: set-zuul-log-path-fact + when: zuul_log_path is not defined + +# Always upload (true), never upload (false) or only on failure ('failure') +- when: zuul_site_upload_logs | default(true) | bool or + (zuul_site_upload_logs == 'failure' and not zuul_success | bool) + block: + # Use chmod instead of file because ansible 2.5 file with recurse and + # follow can't really handle symlinks to . + - name: Ensure logs are readable before uploading + delegate_to: "{{ _undocumented_test_worker_node_ | default('localhost') }}" + command: "chmod -R u=rwX,g=rX,o=rX {{ zuul.executor.log_root }}/" + # ANSIBLE0007 chmod used in place of argument mode to file + tags: + - skip_ansible_lint + + - name: Upload logs to S3 + delegate_to: "{{ _undocumented_test_worker_node_ | default('localhost') }}" + zuul_s3_upload: + endpoint: "{{ upload_logs_s3_endpoint | default(omit) }}" + partition: "{{ zuul_log_partition }}" + bucket: "{{ zuul_log_bucket }}" + prefix: "{{ zuul_log_path }}" + indexes: "{{ zuul_log_create_indexes }}" + aws_access_key: "{{ zuul_log_aws_access_key }}" + aws_secret_key: "{{ zuul_log_aws_secret_key }}" + files: + - "{{ zuul.executor.log_root }}/" + register: upload_results + +- name: Return log URL to Zuul + delegate_to: localhost + zuul_return: + data: + zuul: + log_url: "{{ upload_results.url }}/" + when: upload_results is defined diff --git a/test-playbooks/upload-logs-s3.yaml b/test-playbooks/upload-logs-s3.yaml new file mode 100644 index 000000000..c99859e71 --- /dev/null +++ b/test-playbooks/upload-logs-s3.yaml @@ -0,0 +1,70 @@ +- hosts: all + vars: + zuul_log_aws_access_key: minioadmin + zuul_log_aws_secret_key: minioadmin + zuul_log_bucket: zuul + test_content: "123abc" + _undocumented_test_worker_node_: "{{ inventory_hostname }}" + roles: + - ensure-docker + - ensure-pip + tasks: + - name: Install boto + pip: + name: + - boto3 + - botocore + + - name: Start minio server + command: >- + docker run -d -p 9000:9000 + -e MINIO_ACCESS_KEY={{ zuul_log_aws_access_key }} + -e MINIO_SECRET_KEY={{ zuul_log_aws_secret_key }} + minio/minio server /data + + - name: Make sure bucket exists + s3_bucket: + name: "{{ zuul_log_bucket }}" + state: present + s3_url: 'http://localhost:9000' + aws_access_key: "{{ zuul_log_aws_access_key }}" + aws_secret_key: "{{ zuul_log_aws_secret_key }}" + + - name: Create tempdir to upload to s3 + tempfile: + state: directory + register: fake_zuul_logdir + + - name: Add content to tempfile + copy: + content: "{{ test_content }}" + dest: "{{ fake_zuul_logdir.path }}/testfile" + + - name: Save zuul variables + set_fact: + old_zuul: "{{ zuul }}" + - name: Set simulated zuul variables + set_fact: + new_zuul: "{{ old_zuul | combine({'executor': {'log_root': fake_zuul_logdir.path}}, recursive=True) }}" + + - name: Upload file to s3 + include_role: + name: upload-logs-s3 + vars: + upload_logs_s3_endpoint: 'http://localhost:9000' + zuul: "{{ new_zuul }}" + + - name: Download mc + get_url: + url: https://dl.min.io/client/mc/release/linux-amd64/mc + dest: "{{ fake_zuul_logdir.path }}/mc" + mode: 0755 + + - name: Add localhost minio host + command: "{{ fake_zuul_logdir.path }}/mc config host add local http://localhost:9000 {{ zuul_log_aws_access_key }} {{ zuul_log_aws_secret_key }}" + + - name: List files in minio bucket + command: "{{ fake_zuul_logdir.path }}/mc find local/zuul" + + - name: Check for testfile in minio bucket + command: "{{ fake_zuul_logdir.path }}/mc find local/zuul/{{ zuul_log_path }}/testfile" diff --git a/test-requirements.txt b/test-requirements.txt index bdfdfc8f4..e40b50caa 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -20,5 +20,8 @@ bs4 # For upload-logs-google google-cloud-storage +# For upload-logs-s3 +boto3 + # unittest.mock compatibility package for Python < 3.3 mock;python_version<'3.3' diff --git a/zuul-tests.d/logs-jobs.yaml b/zuul-tests.d/logs-jobs.yaml index 0160c8b78..4bf8e1903 100644 --- a/zuul-tests.d/logs-jobs.yaml +++ b/zuul-tests.d/logs-jobs.yaml @@ -3,8 +3,17 @@ description: Test the local-log-download role files: - roles/local-log-download/.* + - test-playbooks/local-log-download.yaml run: test-playbooks/local-log-download.yaml +- job: + name: zuul-jobs-test-upload-logs-s3 + description: Test the upload-logs-s3 role + files: + - roles/upload-logs-s3/.* + - test-playbooks/upload-logs-s3.yaml + run: test-playbooks/upload-logs-s3.yaml + # -* AUTOGENERATED *- # The following project section is autogenerated by # tox -e update-test-platforms @@ -14,5 +23,6 @@ check: jobs: &id001 - zuul-jobs-test-local-log-download + - zuul-jobs-test-upload-logs-s3 gate: jobs: *id001