Skip to content

Commit

Permalink
first version
Browse files Browse the repository at this point in the history
  • Loading branch information
mmaelicke committed Oct 28, 2022
1 parent 0c87e9b commit cf57d8e
Show file tree
Hide file tree
Showing 7 changed files with 213 additions and 60 deletions.
7 changes: 3 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
FROM python:3.10

# install the toolbox runner tools
RUN pip install toolbox-runner
RUN pip install toolbox-runner==0.5.0


# Do anything you need to install tool dependencies here
RUN echo "Replace this line with a tool"

# install pandas and pandas-profiling
RUN pip install pandas==1.5.1 pandas-profiling==3.4.0
# create the tool input structure
RUN mkdir /in
COPY ./in /in
Expand Down
151 changes: 151 additions & 0 deletions in/dataframe.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
sepallength,sepalwidth,petallength,petalwidth,class
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica
11 changes: 0 additions & 11 deletions in/foo_csv.csv

This file was deleted.

10 changes: 0 additions & 10 deletions in/foo_matrix.mat

This file was deleted.

10 changes: 2 additions & 8 deletions in/tool.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
{
"foobar": {
"foo_int": 42,
"foo_float": 13.37,
"foo_string": "Never eat yellow snow",
"foo_enum": "bar",
"foo_array": [34, 55, 23, 43, 23],
"foo_matrix": "/in/foo_matrix.mat",
"foo_csv": "/in/foo_csv.csv"
"profile": {
"data": "./dataframe.csv"
}
}
48 changes: 43 additions & 5 deletions src/run.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,57 @@
import os
from datetime import datetime as dt

from pandas_profiling import ProfileReport
import numpy as np
import pandas as pd

from toolbox_runner.parameter import parse_parameter

# parse parameters
kwargs = parse_parameter()

# check if a toolname was set in env
toolname = os.environ.get('TOOL_RUN', 'foobar').lower()
toolname = os.environ.get('TOOL_RUN', 'profile').lower()


def load_data(df_or_path):
if isinstance(df_or_path, str):
# oi
path = kwargs['data']
_, ext = os.path.splitext(path)

# check some endings
if ext.lower() in ('.xls', '.xlsx', '.odf', '.ods'):
data = pd.read_excel(df_or_path)
elif ext.lower() in ('.asc', '.dat', '.mat', '.txt'):
data = pd.read_table(df_or_path, comment='#')
else:
raise AttributeError('Got a file path, but the extension is not (yet) supported.')
elif isinstance(df_or_path, (np.ndarray, pd.Series)):
data = pd.DataFrame(df_or_path)
elif isinstance(df_or_path, pd.DataFrame):
data = df_or_path
else:
raise AttributeError(f"The passed data was of type {type(df_or_path)} which is not supported.")
return data


# switch the tool
if toolname == 'foobar':
# RUN the tool here and create the output in /out
with open('/out/STDOUT.log', 'w') as f:
f.write('This toolbox does not include any tool. Did you run the template?\n')
if toolname == 'profile':
# kwargs data will automatically be loaded as a Dataframe. If it is still a string, try
# to figure out what this is.
df = load_data(kwargs['data'])
del kwargs['data']

profile = ProfileReport(df, title="Dataset Report")

# generate the output
profile.to_file('/out/report.html')
js = profile.to_json()

with open('/out/report.json') as f:
f.write(js)


# In any other case, it was not clear which tool to run
else:
Expand Down
36 changes: 14 additions & 22 deletions src/tool.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,17 @@
tools:
foobar:
title: Foo Bar
description: A dummy tool to exemplify the YAML file
version: 0.1
profile:
title: Dataset Profile
description: |
Create a HTML or JSON profiling report for any kind of tabular data.
This image is Python and uses pandas-profiling: https://pypi.org/project/pandas-profiling/
version: 0.2
parameters:
foo_int:
type: integer
foo_float:
type: float
foo_string:
type: string
foo_enum:
type: enum
values:
- foo
- bar
- baz
foo_array:
type: integer
array: true
foo_matrix:
type: file
foo_csv:
data:
type: file
description: |
A CSV file containing the tabular data. From Python SDK, you can pass any pandas.DataFrame.
It is also possible to pass other file formats, but this is still
options:
type: dict
description: Experimental. Do not use.

0 comments on commit cf57d8e

Please sign in to comment.