334 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			334 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import absolute_import
 | 
						|
 | 
						|
import atexit
 | 
						|
import logging
 | 
						|
import os
 | 
						|
import os.path
 | 
						|
import shutil
 | 
						|
import subprocess
 | 
						|
import tempfile
 | 
						|
import time
 | 
						|
import uuid
 | 
						|
 | 
						|
from six.moves import urllib
 | 
						|
from six.moves.urllib.parse import urlparse  # pylint: disable=E0611,F0401
 | 
						|
 | 
						|
from test.service import ExternalService, SpawnedService
 | 
						|
from test.testutil import get_open_port
 | 
						|
 | 
						|
 | 
						|
log = logging.getLogger(__name__)
 | 
						|
 | 
						|
 | 
						|
class Fixture(object):
 | 
						|
    kafka_version = os.environ.get('KAFKA_VERSION', '0.8.0')
 | 
						|
    scala_version = os.environ.get("SCALA_VERSION", '2.8.0')
 | 
						|
    project_root = os.environ.get('PROJECT_ROOT', os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 | 
						|
    kafka_root = os.environ.get("KAFKA_ROOT", os.path.join(project_root, 'servers', kafka_version, "kafka-bin"))
 | 
						|
    ivy_root = os.environ.get('IVY_ROOT', os.path.expanduser("~/.ivy2/cache"))
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def download_official_distribution(cls,
 | 
						|
                                       kafka_version=None,
 | 
						|
                                       scala_version=None,
 | 
						|
                                       output_dir=None):
 | 
						|
        if not kafka_version:
 | 
						|
            kafka_version = cls.kafka_version
 | 
						|
        if not scala_version:
 | 
						|
            scala_version = cls.scala_version
 | 
						|
        if not output_dir:
 | 
						|
            output_dir = os.path.join(cls.project_root, 'servers', 'dist')
 | 
						|
 | 
						|
        distfile = 'kafka_%s-%s' % (scala_version, kafka_version,)
 | 
						|
        url_base = 'https://archive.apache.org/dist/kafka/%s/' % (kafka_version,)
 | 
						|
        output_file = os.path.join(output_dir, distfile + '.tgz')
 | 
						|
 | 
						|
        if os.path.isfile(output_file):
 | 
						|
            log.info("Found file already on disk: %s", output_file)
 | 
						|
            return output_file
 | 
						|
 | 
						|
        # New tarballs are .tgz, older ones are sometimes .tar.gz
 | 
						|
        try:
 | 
						|
            url = url_base + distfile + '.tgz'
 | 
						|
            log.info("Attempting to download %s", url)
 | 
						|
            response = urllib.request.urlopen(url)
 | 
						|
        except urllib.error.HTTPError:
 | 
						|
            log.exception("HTTP Error")
 | 
						|
            url = url_base + distfile + '.tar.gz'
 | 
						|
            log.info("Attempting to download %s", url)
 | 
						|
            response = urllib.request.urlopen(url)
 | 
						|
 | 
						|
        log.info("Saving distribution file to %s", output_file)
 | 
						|
        with open(output_file, 'w') as output_file_fd:
 | 
						|
            output_file_fd.write(response.read())
 | 
						|
 | 
						|
        return output_file
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def test_resource(cls, filename):
 | 
						|
        return os.path.join(cls.project_root, "servers", cls.kafka_version, "resources", filename)
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def kafka_run_class_args(cls, *args):
 | 
						|
        result = [os.path.join(cls.kafka_root, 'bin', 'kafka-run-class.sh')]
 | 
						|
        result.extend(args)
 | 
						|
        return result
 | 
						|
 | 
						|
    def kafka_run_class_env(self):
 | 
						|
        env = os.environ.copy()
 | 
						|
        env['KAFKA_LOG4J_OPTS'] = "-Dlog4j.configuration=file:%s" % self.test_resource("log4j.properties")
 | 
						|
        return env
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def render_template(cls, source_file, target_file, binding):
 | 
						|
        log.info('Rendering %s from template %s', target_file, source_file)
 | 
						|
        with open(source_file, "r") as handle:
 | 
						|
            template = handle.read()
 | 
						|
            assert len(template) > 0, 'Empty template %s' % source_file
 | 
						|
        with open(target_file, "w") as handle:
 | 
						|
            handle.write(template.format(**binding))
 | 
						|
            handle.flush()
 | 
						|
            os.fsync(handle)
 | 
						|
 | 
						|
        # fsync directory for durability
 | 
						|
        # https://blog.gocept.com/2013/07/15/reliable-file-updates-with-python/
 | 
						|
        dirfd = os.open(os.path.dirname(target_file), os.O_DIRECTORY)
 | 
						|
        os.fsync(dirfd)
 | 
						|
        os.close(dirfd)
 | 
						|
 | 
						|
 | 
						|
class ZookeeperFixture(Fixture):
 | 
						|
    @classmethod
 | 
						|
    def instance(cls):
 | 
						|
        if "ZOOKEEPER_URI" in os.environ:
 | 
						|
            parse = urlparse(os.environ["ZOOKEEPER_URI"])
 | 
						|
            (host, port) = (parse.hostname, parse.port)
 | 
						|
            fixture = ExternalService(host, port)
 | 
						|
        else:
 | 
						|
            (host, port) = ("127.0.0.1", None)
 | 
						|
            fixture = cls(host, port)
 | 
						|
 | 
						|
        fixture.open()
 | 
						|
        return fixture
 | 
						|
 | 
						|
    def __init__(self, host, port):
 | 
						|
        self.host = host
 | 
						|
        self.port = port
 | 
						|
 | 
						|
        self.tmp_dir = None
 | 
						|
        self.child = None
 | 
						|
 | 
						|
    def kafka_run_class_env(self):
 | 
						|
        env = super(ZookeeperFixture, self).kafka_run_class_env()
 | 
						|
        env['LOG_DIR'] = os.path.join(self.tmp_dir, 'logs')
 | 
						|
        return env
 | 
						|
 | 
						|
    def out(self, message):
 | 
						|
        log.info("*** Zookeeper [%s:%s]: %s", self.host, self.port or '(auto)', message)
 | 
						|
 | 
						|
    def open(self):
 | 
						|
        self.tmp_dir = tempfile.mkdtemp()
 | 
						|
        self.out("Running local instance...")
 | 
						|
        log.info("  host    = %s", self.host)
 | 
						|
        log.info("  port    = %s", self.port or '(auto)')
 | 
						|
        log.info("  tmp_dir = %s", self.tmp_dir)
 | 
						|
 | 
						|
        # Configure Zookeeper child process
 | 
						|
        template = self.test_resource("zookeeper.properties")
 | 
						|
        properties = os.path.join(self.tmp_dir, "zookeeper.properties")
 | 
						|
        args = self.kafka_run_class_args("org.apache.zookeeper.server.quorum.QuorumPeerMain", properties)
 | 
						|
        env = self.kafka_run_class_env()
 | 
						|
 | 
						|
        # Party!
 | 
						|
        timeout = 5
 | 
						|
        max_timeout = 30
 | 
						|
        backoff = 1
 | 
						|
        end_at = time.time() + max_timeout
 | 
						|
        tries = 1
 | 
						|
        auto_port = (self.port is None)
 | 
						|
        while time.time() < end_at:
 | 
						|
            if auto_port:
 | 
						|
                self.port = get_open_port()
 | 
						|
            self.out('Attempting to start on port %d (try #%d)' % (self.port, tries))
 | 
						|
            self.render_template(template, properties, vars(self))
 | 
						|
            self.child = SpawnedService(args, env)
 | 
						|
            self.child.start()
 | 
						|
            timeout = min(timeout, max(end_at - time.time(), 0))
 | 
						|
            if self.child.wait_for(r"binding to port", timeout=timeout):
 | 
						|
                break
 | 
						|
            self.child.dump_logs()
 | 
						|
            self.child.stop()
 | 
						|
            timeout *= 2
 | 
						|
            time.sleep(backoff)
 | 
						|
            tries += 1
 | 
						|
        else:
 | 
						|
            raise Exception('Failed to start Zookeeper before max_timeout')
 | 
						|
        self.out("Done!")
 | 
						|
        atexit.register(self.close)
 | 
						|
 | 
						|
    def close(self):
 | 
						|
        if self.child is None:
 | 
						|
            return
 | 
						|
        self.out("Stopping...")
 | 
						|
        self.child.stop()
 | 
						|
        self.child = None
 | 
						|
        self.out("Done!")
 | 
						|
        shutil.rmtree(self.tmp_dir)
 | 
						|
 | 
						|
    def __del__(self):
 | 
						|
        self.close()
 | 
						|
 | 
						|
 | 
						|
class KafkaFixture(Fixture):
 | 
						|
    @classmethod
 | 
						|
    def instance(cls, broker_id, zk_host, zk_port, zk_chroot=None,
 | 
						|
                 host=None, port=None,
 | 
						|
                 transport='PLAINTEXT', replicas=1, partitions=2):
 | 
						|
        if zk_chroot is None:
 | 
						|
            zk_chroot = "kafka-python_" + str(uuid.uuid4()).replace("-", "_")
 | 
						|
        if "KAFKA_URI" in os.environ:
 | 
						|
            parse = urlparse(os.environ["KAFKA_URI"])
 | 
						|
            (host, port) = (parse.hostname, parse.port)
 | 
						|
            fixture = ExternalService(host, port)
 | 
						|
        else:
 | 
						|
            # force IPv6 here because of a confusing point:
 | 
						|
            #
 | 
						|
            #  - if the string "localhost" is passed, Kafka will *only* bind to the IPv4 address of localhost
 | 
						|
            #    (127.0.0.1); however, kafka-python will attempt to connect on ::1 and fail
 | 
						|
            #
 | 
						|
            #  - if the address literal 127.0.0.1 is passed, the metadata request during bootstrap will return
 | 
						|
            #    the name "localhost" and we'll go back to the first case. This is odd!
 | 
						|
            #
 | 
						|
            # Ideally, Kafka would bind to all loopback addresses when we tell it to listen on "localhost" the
 | 
						|
            # way it makes an IPv6 socket bound to both 0.0.0.0/0 and ::/0 when we tell it to bind to "" (that is
 | 
						|
            # to say, when we make a listener of PLAINTEXT://:port.
 | 
						|
            #
 | 
						|
            # Note that even though we specify the bind host in bracket notation, Kafka responds to the bootstrap
 | 
						|
            # metadata request without square brackets later.
 | 
						|
            if host is None:
 | 
						|
                host = "[::1]"
 | 
						|
            fixture = KafkaFixture(host, port, broker_id,
 | 
						|
                                   zk_host, zk_port, zk_chroot,
 | 
						|
                                   transport=transport,
 | 
						|
                                   replicas=replicas, partitions=partitions)
 | 
						|
            fixture.open()
 | 
						|
        return fixture
 | 
						|
 | 
						|
    def __init__(self, host, port, broker_id, zk_host, zk_port, zk_chroot,
 | 
						|
                 replicas=1, partitions=2, transport='PLAINTEXT'):
 | 
						|
        self.host = host
 | 
						|
        self.port = port
 | 
						|
 | 
						|
        self.broker_id = broker_id
 | 
						|
        self.transport = transport.upper()
 | 
						|
        self.ssl_dir = self.test_resource('ssl')
 | 
						|
 | 
						|
        self.zk_host = zk_host
 | 
						|
        self.zk_port = zk_port
 | 
						|
        self.zk_chroot = zk_chroot
 | 
						|
 | 
						|
        self.replicas = replicas
 | 
						|
        self.partitions = partitions
 | 
						|
 | 
						|
        self.tmp_dir = None
 | 
						|
        self.child = None
 | 
						|
        self.running = False
 | 
						|
 | 
						|
    def kafka_run_class_env(self):
 | 
						|
        env = super(KafkaFixture, self).kafka_run_class_env()
 | 
						|
        env['LOG_DIR'] = os.path.join(self.tmp_dir, 'logs')
 | 
						|
        return env
 | 
						|
 | 
						|
    def out(self, message):
 | 
						|
        log.info("*** Kafka [%s:%s]: %s", self.host, self.port or '(auto)', message)
 | 
						|
 | 
						|
    def open(self):
 | 
						|
        if self.running:
 | 
						|
            self.out("Instance already running")
 | 
						|
            return
 | 
						|
 | 
						|
        self.tmp_dir = tempfile.mkdtemp()
 | 
						|
        self.out("Running local instance...")
 | 
						|
        log.info("  host       = %s", self.host)
 | 
						|
        log.info("  port       = %s", self.port or '(auto)')
 | 
						|
        log.info("  transport  = %s", self.transport)
 | 
						|
        log.info("  broker_id  = %s", self.broker_id)
 | 
						|
        log.info("  zk_host    = %s", self.zk_host)
 | 
						|
        log.info("  zk_port    = %s", self.zk_port)
 | 
						|
        log.info("  zk_chroot  = %s", self.zk_chroot)
 | 
						|
        log.info("  replicas   = %s", self.replicas)
 | 
						|
        log.info("  partitions = %s", self.partitions)
 | 
						|
        log.info("  tmp_dir    = %s", self.tmp_dir)
 | 
						|
 | 
						|
        # Create directories
 | 
						|
        os.mkdir(os.path.join(self.tmp_dir, "logs"))
 | 
						|
        os.mkdir(os.path.join(self.tmp_dir, "data"))
 | 
						|
 | 
						|
        self.out("Creating Zookeeper chroot node...")
 | 
						|
        args = self.kafka_run_class_args("org.apache.zookeeper.ZooKeeperMain",
 | 
						|
                                         "-server", "%s:%d" % (self.zk_host, self.zk_port),
 | 
						|
                                         "create",
 | 
						|
                                         "/%s" % self.zk_chroot,
 | 
						|
                                         "kafka-python")
 | 
						|
        env = self.kafka_run_class_env()
 | 
						|
        proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 | 
						|
 | 
						|
        if proc.wait() != 0:
 | 
						|
            self.out("Failed to create Zookeeper chroot node")
 | 
						|
            self.out(proc.stdout.read())
 | 
						|
            self.out(proc.stderr.read())
 | 
						|
            raise RuntimeError("Failed to create Zookeeper chroot node")
 | 
						|
        self.out("Done!")
 | 
						|
 | 
						|
        # Configure Kafka child process
 | 
						|
        properties = os.path.join(self.tmp_dir, "kafka.properties")
 | 
						|
        template = self.test_resource("kafka.properties")
 | 
						|
        args = self.kafka_run_class_args("kafka.Kafka", properties)
 | 
						|
        env = self.kafka_run_class_env()
 | 
						|
 | 
						|
        timeout = 5
 | 
						|
        max_timeout = 30
 | 
						|
        backoff = 1
 | 
						|
        end_at = time.time() + max_timeout
 | 
						|
        tries = 1
 | 
						|
        auto_port = (self.port is None)
 | 
						|
        while time.time() < end_at:
 | 
						|
            # We have had problems with port conflicts on travis
 | 
						|
            # so we will try a different port on each retry
 | 
						|
            # unless the fixture was passed a specific port
 | 
						|
            if auto_port:
 | 
						|
                self.port = get_open_port()
 | 
						|
            self.out('Attempting to start on port %d (try #%d)' % (self.port, tries))
 | 
						|
            self.render_template(template, properties, vars(self))
 | 
						|
            self.child = SpawnedService(args, env)
 | 
						|
            self.child.start()
 | 
						|
            timeout = min(timeout, max(end_at - time.time(), 0))
 | 
						|
            if self.child.wait_for(r"\[Kafka Server %d\], Started" %
 | 
						|
                                   self.broker_id, timeout=timeout):
 | 
						|
                break
 | 
						|
            self.child.dump_logs()
 | 
						|
            self.child.stop()
 | 
						|
            timeout *= 2
 | 
						|
            time.sleep(backoff)
 | 
						|
            tries += 1
 | 
						|
        else:
 | 
						|
            raise Exception('Failed to start KafkaInstance before max_timeout')
 | 
						|
        self.out("Done!")
 | 
						|
        self.running = True
 | 
						|
        atexit.register(self.close)
 | 
						|
 | 
						|
    def __del__(self):
 | 
						|
        self.close()
 | 
						|
 | 
						|
    def close(self):
 | 
						|
        if not self.running:
 | 
						|
            self.out("Instance already stopped")
 | 
						|
            return
 | 
						|
 | 
						|
        self.out("Stopping...")
 | 
						|
        self.child.stop()
 | 
						|
        self.child = None
 | 
						|
        self.out("Done!")
 | 
						|
        shutil.rmtree(self.tmp_dir)
 | 
						|
        self.running = False
 |