Skip to content

Commit

Permalink
Merged in DEV-11600-create-runner-for-pyconnect (pull request #4)
Browse files Browse the repository at this point in the history
DEV-11600 create runner for pyconnect

Approved-by: Felix Eggert <[email protected]>
  • Loading branch information
Swen Wenzel committed Sep 7, 2018
2 parents a118284 + 217d40d commit c2c7bd1
Show file tree
Hide file tree
Showing 27 changed files with 779 additions and 380 deletions.
17 changes: 7 additions & 10 deletions .drone.yml
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
pipeline:
build:
image: python:${PYTHON_VERSION}-stretch
commands:
- python -m venv venv_test
- . venv_test/bin/activate
- pip install -r requirements.txt

full-tests:
image: python:${PYTHON_VERSION}-stretch
commands:
- apt-get update
- apt-get install -yq --no-install-recommends openjdk-8-jre
- . venv_test/bin/activate
- pytest --run-e2e --doctest-modules
- apt-get install -yq --no-install-recommends openjdk-8-jre virtualenv
- virtualenv --python=python3 .venv
- ./.venv/bin/pip install -U pip
- ./.venv/bin/pip install -r test-requirements.txt -r flake8-requirements.txt -e .
- (until (curl -s "http://rest-proxy:8082/topics" >/dev/null); do sleep 0.1s; done)
- ./.venv/bin/pytest --run-e2e --doctest-modules
- ./.venv/bin/flake8

slack:
image: plugins/slack
Expand Down
3 changes: 2 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ ignore = E999
exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,*.pkl,*.json,*.csv,.venv,pre-commit
max-line-length=119
max-complexity=10
import-order-style=edited
import-order-style=pycharm
application-import-names=pyconnect,test
17 changes: 10 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,23 @@ VERSION := 0.0.2
GROUP := None
SHELL = /bin/bash

install-hooks:
pip3 install --user -r flake8-requirements.txt && \
pip3 install --user gitpython==2.1.10 && \
install-hooks: install-virtualenv
. .venv/bin/activate && \
pip3 install -r flake8-requirements.txt && \
pip3 install gitpython==2.1.10 && \
ln -sf ../../commithooks/pre-commit .git/hooks/pre-commit && \
ln -sf ../../commithooks/prepare-commit-msg .git/hooks/prepare-commit-msg && \
chmod +x .git/hooks/pre-commit && \
chmod +x .git/hooks/prepare-commit-msg && \
git config --bool flake8.strict true

install-system-packages:
sudo apt-get install docker docker-compose kafkacat python-virtualenv python3.7 -y
sudo apt-get install docker docker-compose kafkacat virtualenv python3.6 -y

install-virtualenv:
[[ -d .venv ]] || virtualenv --python=3.7 ./.venv
[[ -d .venv ]] || virtualenv --python=3.6 ./.venv
./.venv/bin/python -m pip install -r requirements.txt
./.venv/bin/python -m pip install -e .

install-hosts:
[[ -n "`cat /etc/hosts | grep __start_pyconnect__`" ]] || \
Expand Down Expand Up @@ -67,8 +69,9 @@ check-offsets: boot-cluster
test/kafka/bin/kafka-consumer-groups.sh --bootstrap-server broker:9092 --describe --group $(GROUP) --members --verbose

publish-test:
python setup.py sdist
twine upload dist/* -r testpypi
rm -rf dist
python setup.py sdist bdist_wheel
twine upload dist/* --repository-url https://test.pypi.org/legacy/

publish: publish-test
twine upload dist/*
2 changes: 1 addition & 1 deletion commithooks/pre-commit
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash

# flake 8 invocation
python3 commithooks/pre-commit-flake.py || exit
./.venv/bin/python commithooks/pre-commit-flake.py || exit
6 changes: 5 additions & 1 deletion commithooks/prepare-commit-msg
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,8 @@ if ! [[ $( echo "$name" | egrep -i "^dev-[0-9]+") ]]; then
exit 0
fi
nameshort=$( echo "$name" | egrep -io "dev-[0-9]+")
echo "$nameshort": "$(cat "$1")" > "$1"

# make sure $nameshort not already in commit message
if [[ -z $(grep -F "$nameshort" $1) ]]; then
echo "$nameshort": "$(cat "$1")" > "$1"
fi
16 changes: 16 additions & 0 deletions examples/file_sink/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM python:3.6

RUN ["pip", "install", "pyconnect"]

COPY . /app/

WORKDIR /app

ENV PYCONNECT_BOOTSTRAP_SERVERS = 'broker:9092'
ENV PYCONNECT_SCHEMA_REGISTRY = 'schema-registry:8082'
ENV PYCONNECT_TOPICS = 'testtopic-sink'
ENV PYCONNECT_SINK_DIRECTORY = '/tmp/filesink/'
ENV PYCONNECT_FILENAME = 'sinkfile.json'
ENV PYCONNECT_GROUP_ID = 'file_sink_test'

ENTRYPOINT ["python", "file_sink.py", "--config", "env"]
121 changes: 121 additions & 0 deletions examples/file_sink/file_sink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import json
import logging
import pathlib
from typing import List, cast

from confluent_kafka.cimpl import Message

from pyconnect import PyConnectSink, SinkConfig
from pyconnect.core import Status, message_repr

logger = logging.getLogger(__name__)


class FileSinkConfig(SinkConfig):
"""
In addition to the fields from :class:`pyconnect.config.SinkConfig` this class provides the following fields:
**sink_directory**: :class:`pathlib.Path`
The directory where this sink shall put the file it writes all messages to.
**sink_filename**: str
The name of the file that this sink writes all messages to.
"""
__parsers = {'sink_directory': lambda p: pathlib.Path(p).absolute()}

def __init__(self, conf_dict):
conf_dict = conf_dict.copy()
self['sink_directory'] = conf_dict.pop('sink_directory')
self['sink_filename'] = conf_dict.pop('sink_filename')
super().__init__(conf_dict)
logger.debug(f'Configuration: {self!r}')


class FileSink(PyConnectSink):
"""
A sink that writes all messages it receives to a single file.
"""

def __init__(self, config: FileSinkConfig):
super().__init__(config)
self._buffer: List[Message] = []

def on_message_received(self, msg: Message) -> None:
logger.debug(f'Message Received: {message_repr(msg)}')
self._buffer.append(msg)

def on_startup(self):
logger.debug(f'Creating parent directory: {self.config["sink_directory"]}')
cast(pathlib.Path, self.config['sink_directory']).mkdir(parents=True, exist_ok=True)

def on_flush(self) -> None:
lines = [
json.dumps({'key': msg.key(), 'value': msg.value()}) + '\n'
for msg in self._buffer
]
sinkfile = self.config['sink_directory'] / self.config['sink_filename']
logger.info(f'Writing {len(lines)} line(s) to {sinkfile}')
with open(sinkfile, 'a') as outfile:
outfile.writelines(lines)

logger.debug('The following lines were written:')
for line in lines:
logger.debug(f'> {line!r}')

self._buffer.clear()

def on_no_message_received(self):
# TODO the following should probably be two attributes like 'has_subscriptions' and 'all_partitions_at_eof'
if len(self.eof_reached) > 0 and all(self.eof_reached.values()):
logger.info('EOF reached, stopping.')
return Status.STOPPED
return None


def main():
# TODO move to pyconnect.core.main(connector_cls, config_cls)
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--config', choices=['env', 'yaml', 'json'], default='env', help='Defines where the config '
'is loaded from')
parser.add_argument('--conf_file', default=None, help='When `conf` is yaml or json, then config is loaded'
'from this file, default will be `./config.(yaml|json)` '
'depending on which kind of file you chose')
parser.add_argument('--loglevel', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
help='Set log level to given value, if "NOTSET" (default) no logging is active.',
default='NOTSET')

args = parser.parse_args()
config: FileSinkConfig = None

if args.loglevel != 'NOTSET':
base_logger = logging.getLogger()
loglevel = getattr(logging, args.loglevel)

formatter = logging.Formatter('%(levelname)-8s - %(name)-12s - %(message)s')

stream_handler = logging.StreamHandler()
stream_handler.setLevel(loglevel)
stream_handler.setFormatter(formatter)

base_logger.setLevel(loglevel)
base_logger.addHandler(stream_handler)

if args.config == 'env':
config = FileSinkConfig.from_env_variables()
elif args.config == 'yaml':
config = FileSinkConfig.from_yaml_file(args.conf_file or ('./config.' + args.config))
elif args.config == 'json':
config = FileSinkConfig.from_json_file(args.conf_file or ('./config.' + args.config))
else:
print('Illegal Argument for --config!')
parser.print_help()
exit(1)

sink = FileSink(config)
sink.run()


if __name__ == '__main__':
main()
16 changes: 16 additions & 0 deletions examples/file_source/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM python:3.6

RUN 'pip install pyconnect'

COPY . /app/

WORKDIR /app

ENV PYCONNECT_BOOTSTRAP_SERVERS = 'broker:9092'
ENV PYCONNECT_SCHEMA_REGISTRY = 'schema-registry:8082'
ENV PYCONNECT_TOPIC = 'testtopic-source'
ENV PYCONNECT_OFFSET_TOPIC = 'testtopic-source-offset'
ENV PYCONNECT_SOURCE_DIRECTORY = '/tmp/filesource/',
ENV PYCONNECT_SOURCE_FILENAME = 'sourcefile.json'

ENTRYPOINT ['python', 'file_source.py', '--config', 'env']
119 changes: 119 additions & 0 deletions examples/file_source/file_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import json
import logging
import pathlib
from typing import Any, Optional, TextIO, Tuple

from pyconnect import PyConnectSource, SourceConfig
from pyconnect.core import Status

logger = logging.getLogger(__name__)


class FileSourceConfig(SourceConfig):
"""
In addition to the fields from :class:`pyconnect.config.SourceConfig` this class provides the following fields:
**source_directory**: :class:`pathlib.Path`
The directory where this source looks for the file it reads all messages from.
**source_filename**: str
The name of the file that this source reads messages from.
The file should contain lines of json objects like `{'key': Any, 'value': Any}`
"""

__parsers = {'source_directory': lambda p: pathlib.Path(p).absolute()}

def __init__(self, conf_dict):
conf_dict = conf_dict.copy()
self['source_directory'] = conf_dict.pop('source_directory')
self['source_filename'] = conf_dict.pop('source_filename')
super().__init__(conf_dict)
logger.debug(f'Configuration: {self!r}')


class FileSource(PyConnectSource):
"""
A source that reads and publishes json objects from a file.
"""
def __init__(self, config: FileSourceConfig):
super().__init__(config)
self._file: Optional[TextIO] = None

def on_startup(self):
source_path = self.config['source_directory'] / self.config['source_filename']
logger.info(f'Opening file "{source_path}" for reading.')
self._file = open(source_path, 'r')

def seek(self, index: int) -> None:
logger.info(f'Seeking to position: {index!r}')
self._file.seek(index)

def read(self) -> Tuple[Any, Any]:
line = next(self._file)
logger.debug(f'Read line: {line!r}')
record = json.loads(line)
return record['key'], record['value']

def on_eof(self) -> Status:
logger.info('EOF reached, stopping.')
return Status.STOPPED

def get_index(self) -> int:
index = self._file.tell()
logger.debug(f'File object is at position: {index!r}')
return index

def close(self):
try:
super().close()
finally:
logger.info('Closing file object.')
self._file.close()


def main():
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--config', choices=['env', 'yaml', 'json'], default='env', help='Defines where the config '
'is loaded from')
parser.add_argument('--conf_file', default=None, help='When `conf` is yaml or json, then config is loaded'
'from this file, default will be `./config.(yaml|json)` '
'depending on which kind of file you chose')
parser.add_argument('--loglevel', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
help='Set log level to given value, if "NOTSET" (default) no logging is active.',
default='NOTSET')

args = parser.parse_args()
config: FileSourceConfig = None

if args.loglevel != 'NOTSET':
base_logger = logging.getLogger()
loglevel = getattr(logging, args.loglevel)

formatter = logging.Formatter('%(levelname)-8s - %(name)-12s - %(message)s')

stream_handler = logging.StreamHandler()
stream_handler.setLevel(loglevel)
stream_handler.setFormatter(formatter)

base_logger.setLevel(loglevel)
base_logger.addHandler(stream_handler)

if args.config == 'env':
config = FileSourceConfig.from_env_variables()
elif args.config == 'yaml':
config = FileSourceConfig.from_yaml_file(args.conf_file or ('./config.' + args.config))
elif args.config == 'json':
config = FileSourceConfig.from_json_file(args.conf_file or ('./config.' + args.config))
else:
print('Illegal Argument for --config!')
parser.print_help()
exit(1)

source = FileSource(config)
source.run()


if __name__ == '__main__':
main()
10 changes: 0 additions & 10 deletions pyconnect/__init__.py

This file was deleted.

5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@

setup(
name='pyconnect',
version='0.0.2',
version='0.0.5',
packages=['pyconnect'],
package_dir={'': 'src'},

# minimal requirements to run pyconnect
install_requires=[
"confluent-kafka>=0.11.5",
"confluent-kafka[avro]>=0.11.5",
"pyaml>=3.13"
],
url='https://github.com/MrTrustworthy/pyconnect',
Expand Down
Loading

0 comments on commit c2c7bd1

Please sign in to comment.