zuul/zuul/lib/streamer_utils.py
James E. Blair 18fb324f1e Add auth token to websocket
When making a websocket request, browsers do not send the
"Authorization" header.  Therefore if a Zuul tenant is run in
a configuration where authz is required for read-only access,
the websocket-based log streaming will always fail.

To correct this, we will remove the http request authz check
from the console-stream endpoint, and add an optional token
parameter to the websocket message payload.  The JS web app
will be responsible for sending the auth token in the payload,
and the web server will validate it if it is required for the
tenant.  Thanks to Andrei Dmitriev for this suggestion.

Since we essentially have two different authz code paths in
zuul-web now, in order to share as much code as possible, the
authz sequence is refactored in such a way that the final authz
check can be deferred.  First we create an AuthContext at the
start of the request which stores tenant and header information,
then the actual validation is performed in a separate step where
the token can optionally be provided.

In the http code path, we create the AuthContext and validate
immediately, using the Authorization header, and we do all of that
in the cherrypy tool at the start of the request.

In the websocket code path, we create the AuthContext as the
websocket handler is being created by the cherrypy request handler,
then we perform validation after receiving a message on the
websocket.  We use the token supplied from the request.

Error handling is adjusted so in the http code path, exceptions
that return appropriate http errors are raised, but in the
websocket path, these are caught and translated into websocket
close calls.

A related issue is that we perform no validation that the
streaming build log being requested belongs to the tenant via
which the request is being sent.  This was unecessary before
read-only access was an option, but now that it is, we should
check that a streaming build request arrives via the correct
tenant URL.  This change adjusts that as well.

During testing, it was noted that the tenant configuration syntax
allows admin-rules and access-rules to use the scalar-or-list
pattern, however some parts of the code assumed only lists.  The
configloader is updated to use scalar-or-list for both of those
values.

Change-Id: Ifd4c21bb1fe962bf23acb5b4f10b3bbaba61e63a
Co-Authored-By: Andrei Dmitriev <andrei.dmitriev@nokia.com>
2023-10-24 07:29:55 -07:00

244 lines
8.7 KiB
Python

# Copyright 2017 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
'''
This file contains code common to finger log streaming functionality.
The log streamer process within each executor, the finger gateway service,
and the web interface will all make use of this module.
'''
import logging
import os
import pwd
import random
import select
import socket
import socketserver
import ssl
import threading
import time
from zuul.exceptions import StreamingError
from zuul.zk.components import COMPONENT_REGISTRY
log = logging.getLogger("zuul.lib.streamer_utils")
class BaseFingerRequestHandler(socketserver.BaseRequestHandler):
'''
Base class for common methods for handling finger requests.
'''
MAX_REQUEST_LEN = 1024
REQUEST_TIMEOUT = 10
def getCommand(self):
poll = select.poll()
bitmask = (select.POLLIN | select.POLLERR |
select.POLLHUP | select.POLLNVAL)
poll.register(self.request, bitmask)
buffer = b''
ret = None
start = time.time()
while True:
elapsed = time.time() - start
timeout = max(self.REQUEST_TIMEOUT - elapsed, 0)
if not timeout:
raise Exception("Timeout while waiting for input")
for fd, event in poll.poll(timeout):
if event & select.POLLIN:
x = self.request.recv(self.MAX_REQUEST_LEN)
if not x:
# This will cause the caller to quietly shut down
raise BrokenPipeError
buffer += x
else:
raise Exception("Received error event")
if len(buffer) >= self.MAX_REQUEST_LEN:
raise Exception("Request too long")
try:
ret = buffer.decode('utf-8')
x = ret.find('\n')
if x > 0:
# rstrip to remove any other unnecessary chars (e.g. \r)
return ret[:x].rstrip()
except UnicodeDecodeError:
pass
class CustomThreadingTCPServer(socketserver.ThreadingTCPServer):
'''
Custom version that allows us to drop privileges after port binding.
'''
def __init__(self, *args, **kwargs):
# NOTE(pabelanger): Set up address_family for socketserver based on the
# fingergw.listen_address setting in zuul.conf.
# param tuple args[0]: The address and port to bind to for
# socketserver.
server_address = args[0]
address_family = None
for res in socket.getaddrinfo(
server_address[0], server_address[1], 0, self.socket_type):
if res[0] == socket.AF_INET6:
# If we get an IPv6 address, break our loop and use that first.
address_family = res[0]
break
elif res[0] == socket.AF_INET:
address_family = res[0]
# Check to see if getaddrinfo failed.
if not address_family:
raise Exception("getaddrinfo returns an empty list")
self.address_family = address_family
self.user = kwargs.pop('user', None)
self.pid_file = kwargs.pop('pid_file', None)
self.server_ssl_key = kwargs.pop('server_ssl_key', None)
self.server_ssl_cert = kwargs.pop('server_ssl_cert', None)
self.server_ssl_ca = kwargs.pop('server_ssl_ca', None)
socketserver.ThreadingTCPServer.__init__(self, *args, **kwargs)
def change_privs(self):
'''
Drop our privileges to another user.
'''
if os.getuid() != 0:
return
pw = pwd.getpwnam(self.user)
# Change owner on our pid file so it can be removed by us after
# dropping privileges. May not exist if not a daemon.
if self.pid_file and os.path.exists(self.pid_file):
os.chown(self.pid_file, pw.pw_uid, pw.pw_gid)
os.setgroups([])
os.setgid(pw.pw_gid)
os.setuid(pw.pw_uid)
os.umask(0o022)
def server_bind(self):
'''
Overridden from the base class to allow address reuse and to drop
privileges after binding to the listening socket.
'''
self.allow_reuse_address = True
socketserver.ThreadingTCPServer.server_bind(self)
if self.user:
self.change_privs()
def server_close(self):
'''
Overridden from base class to shutdown the socket immediately.
'''
try:
self.socket.shutdown(socket.SHUT_RD)
self.socket.close()
except socket.error as e:
# If it's already closed, don't error.
if e.errno == socket.EBADF:
return
raise
def process_request(self, request, client_address):
'''
Overridden from the base class to name the thread.
'''
t = threading.Thread(target=self.process_request_thread,
name='socketserver_Thread',
args=(request, client_address))
t.daemon = self.daemon_threads
t.start()
def get_request(self):
sock, addr = super().get_request()
if all([self.server_ssl_key, self.server_ssl_cert,
self.server_ssl_ca]):
context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
context.load_cert_chain(self.server_ssl_cert, self.server_ssl_key)
context.load_verify_locations(self.server_ssl_ca)
context.verify_mode = ssl.CERT_REQUIRED
sock = context.wrap_socket(sock, server_side=True)
return sock, addr
def getJobLogStreamAddress(executor_api, uuid, source_zone,
tenant_name=None):
"""
Looks up the log stream address for the given build UUID.
Try to find the build request for the given UUID in ZooKeeper
by searching through all available zones. If a build request
was found we use the worker information to build the log stream
address.
"""
# Search for the build request in ZooKeeper. This iterates over all
# available zones (inlcuding unzoned) and stops when the UUID is
# found.
# TODO (felix): Remove the zk_worker_zone return value after a deprecation
# period. This is kept for backwards-compatibility until all executors
# store their zone information in the worker_info dictionary.
build_request, zk_worker_zone = executor_api.getByUuid(uuid)
if build_request is None:
raise StreamingError("Build not found")
if tenant_name is not None and build_request.tenant_name != tenant_name:
# Intentionally the same error as above to avoid leaking
# out-of-tenant build information.
raise StreamingError("Build not found")
worker_info = build_request.worker_info
if not worker_info:
raise StreamingError("Build did not start yet")
worker_zone = worker_info.get("zone", zk_worker_zone)
job_log_stream_address = {}
if worker_zone and source_zone != worker_zone:
info = _getFingerGatewayInZone(worker_zone)
if info:
job_log_stream_address['server'] = info.hostname
job_log_stream_address['port'] = info.public_port
job_log_stream_address['use_ssl'] = info.use_ssl
log.debug('Source (%s) and worker (%s) zone '
'are different, routing via %s:%s',
source_zone, worker_zone,
info.hostname, info.public_port)
else:
log.warning('Source (%s) and worker (%s) zone are different'
'but no fingergw in target zone found. '
'Falling back to direct connection.',
source_zone, worker_zone)
else:
log.debug('Source (%s) or worker zone (%s) undefined '
'or equal, no routing is needed.',
source_zone, worker_zone)
if 'server' not in job_log_stream_address:
job_log_stream_address['server'] = worker_info["hostname"]
job_log_stream_address['port'] = worker_info["log_port"]
return job_log_stream_address
def _getFingerGatewayInZone(zone):
registry = COMPONENT_REGISTRY.registry
gws = [gw for gw in registry.all('fingergw') if gw.zone == zone]
if gws:
return random.choice(gws)
return None