diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 00000000..7fe36854 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,24 @@ +name: docs +on: + push: + branches: + - main + - docs + +permissions: + contents: write + +jobs: + deploy-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: 3.x + - uses: actions/cache@v3 + with: + key: ${{ github.ref }} + path: .cache + - run: pip install mkdocs-material + - run: mkdocs gh-deploy --force diff --git a/.gitignore b/.gitignore index 2bc61bf0..e538d8ff 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,5 @@ cython_debug/ #.idea/ node_modules + +.DS_Store diff --git a/.vscode/settings.json b/.vscode/settings.json index 1be6a53a..28c73cec 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,5 +3,15 @@ "tests" ], "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true + "python.testing.pytestEnabled": true, + "yaml.schemas": { + "https://squidfunk.github.io/mkdocs-material/schema.json": "mkdocs.yml" + }, + "yaml.customTags": [ + "!ENV scalar", + "!ENV sequence", + "tag:yaml.org,2002:python/name:materialx.emoji.to_svg", + "tag:yaml.org,2002:python/name:materialx.emoji.twemoji", + "tag:yaml.org,2002:python/name:pymdownx.superfences.fence_code_format" + ] } \ No newline at end of file diff --git a/README.md b/README.md index b156c333..49fccd3b 100644 --- a/README.md +++ b/README.md @@ -26,11 +26,9 @@

Python task scheduler with a user-friendly web UI
- Explore the docs ยป + Official website ยป

- View Demo - ยท Report Bug ยท Request Feature @@ -38,57 +36,29 @@ - - -

- Table of Contents -
    -
  1. - About The Project - -
  2. -
  3. - Getting Started - -
  4. -
  5. Usage
  6. -
  7. Roadmap
  8. -
  9. Contributing
  10. -
  11. License
  12. -
  13. Contact
  14. -
  15. Acknowledgments
  16. -
-
- - - ## About The Project -![Mario Pype Screen Shot](docs/screenshot.png) - Mario Pype is a simple task scheduler for Python with a web UI and a REST API, if you need to run and monitor recurring python scripts then it's the right tool for you! +![Mario Pype Screen Shot](docs/assets/images/screenshot.png) + > This project is at its beginning, so it can be shaped and improved with your feedback and help! If you like it, star it ๐ŸŒŸ! If you want a feature or find a bug, open an issue. -Features: -- โฐ Task scheduing based on APScheduler (supports Interval, Cron and Date triggers) -- ๐Ÿ’ป Built-in Web interface, no HTML/JS/CSS coding required -- ๐Ÿ‘ฉโ€๐Ÿ’ป๐Ÿ Pipelines and tasks are defined in pure Python -- ๐Ÿ‘Š Pipelines can be run manually from the web UI -- ๐Ÿ” Debug your tasks exploring logs and output data -- ๐Ÿ“ฉ Monitor tasks executions and get alerted if something goes wrong -- ๐Ÿ’ฃ Use the REST API for advanced integrations -- โœจ A lot more features are coming, check the [open issues](https://github.com/lucafaggianelli/mario-pype/issues)! +## Features +* โฐ Task scheduling based on [APScheduler](https://github.com/agronholm/apscheduler) (supports Interval, Cron and Date triggers) +* ๐Ÿ’ป Built-in Web interface, no HTML/JS/CSS coding required +* ๐Ÿ‘ฉโ€๐Ÿ’ป๐Ÿ Pipelines and tasks are defined in pure Python +* ๐ŸŽ›๏ธ Pipelines can be parametrized via [Pydantic](https://docs.pydantic.dev/) +* ๐Ÿ‘‰ Pipelines can be run manually from the web UI +* ๐Ÿ” Secured via OAuth2 +* ๐Ÿ” Debug each run exploring logs and output data +* ๐Ÿ“ฉ Monitor the pipelines and get alerted if something goes wrong +* ๐Ÿ’ฃ Use the REST API for advanced integrations When you shouldn't use it: - you need a lot of scalability and you want to run on a distributed system @@ -110,6 +80,9 @@ When you shouldn't use it: ## Getting Started +Check the ๐Ÿ‘‰ [official website](https://lucafaggianelli.github.io/mario-pype/) +for more detailed info! + ### Prerequisites To run Mario Pype you only need Python (v3.8 or later), if you don't have it installed yet, go @@ -160,7 +133,8 @@ from apscheduler.triggers.interval import IntervalTrigger from mario import Mario, task, get_logger, Pipeline, Trigger -dummy_pipeline = Pipeline( +sales_pipeline = Pipeline( + id="sales_pipeline", tasks = [fetch_raw_sales_data], triggers = [ Trigger( @@ -176,7 +150,7 @@ dummy_pipeline = Pipeline( A *Task* is the base block in Mario Pype and it's just a Python function that performs an action. -This is the Task `fetch_raw_sales_data` used in the `DummyPipeline` pipeline ... it doesn't do much, +This is the Task `fetch_raw_sales_data` used in the `sales_pipeline` pipeline ... it doesn't do much, but it showcase the basics: ```py diff --git a/docs/assets/images/mario-pipe-flower.png b/docs/assets/images/mario-pipe-flower.png new file mode 100644 index 00000000..ec718a7c Binary files /dev/null and b/docs/assets/images/mario-pipe-flower.png differ diff --git a/docs/screenshot.png b/docs/assets/images/screenshot.png similarity index 100% rename from docs/screenshot.png rename to docs/assets/images/screenshot.png diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css new file mode 100644 index 00000000..6027d05f --- /dev/null +++ b/docs/assets/stylesheets/extra.css @@ -0,0 +1,13 @@ +[data-md-toggle="search"]:not(:checked) ~ .md-header .md-search__form::after { + position: absolute; + top: .3rem; + right: .3rem; + display: block; + padding: .1rem .4rem; + color: #ffffffb3; + font-weight: bold; + font-size: .8rem; + border: .05rem solid #ffffff3b; + border-radius: .1rem; + content: "/"; +} diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 00000000..05c4c6aa --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,187 @@ +MarioPype is configurable via environmental variables, a YAML file +or even better via a combination of the 2. + +!!! info "Why a hybrid configuration?" + + An entire configuration can be quite large so storing it as environmental + variables can be quite hard to maintain, moreover some parts of the + configuration should be stored together with the code as they are part + of the system and some parts of it are secret so you need env vars + +Create a configuration file in the root of your project named `mario.config.yaml` +(or `mario.config.yml` if you prefer) and set the values you need, you should +commit this file to the git repo: + +```yaml title="mario.config.yaml" +frontend_url: https://pipelines.example.com + +auth: + client_id: $GOOGLE_CLIENT_ID + client_secret: $GOOGLE_CLIENT_SECRET + server_metadata_url: https://accounts.google.com/.well-known/openid-configuration + +notifications: + - pipeline_status: + - failed + channels: + - $GMAIL_ACCOUNT + - $MSTEAMS_WEBHOOK +``` + +Now define the secrets as environmental variables in a `.env` file, +in your shell or in your hosting environment. + + +!!! tip + + By default, MarioPype will load any `.env` found in your project root. + +!!! Warning + + You shouldn't commit the `.env` file as it contains secrets! + +```shell title=".env" +# Auth +GOOGLE_CLIENT_ID="ABC123" +GOOGLE_CLIENT_SECRET="DEF456" + +# Notifications +GMAIL_ACCOUNT=mailto://myuser:mypass@gmail.com +MSTEAMS_WEBHOOK=msteams://TokenA/TokenB/TokenC/ +``` + +## System + +!!! tip + + If you're running MarioPype locally, in most cases you don't need to change + these settings + +### `database_url` + +The Sqlite DB URI, by default `sqlite:///./mario.db` + +### `server_url` + +The URL of the backend, be default `http://localhost:8000`. + +Change it if running in production. + +### `frontend_url` + +The URL of the frontend, by default is the same as the backend, +change it if the frontend is served at a different URL, for example +during the frontend development. + +## Notifications + +MarioPype can send notifications after a pipeline has run based on the status +of the run itself (success, failure, etc.). + +The notifications configuration can be defined in the YAML +file as a list of [`NotificationRule`](#notificationrule)s: + +```yaml title="mario.config.yaml" +notifications: + # Send notifications only if the pipelines failed + - pipeline_status: + - failed + channels: + # Send them to my gmail address (from my address itself) + # Better to use an env var here + - mailto://myuser:mypass@gmail.com + # Send notifications only if the pipelines succeeded or was cancelled + - pipeline_status: + - completed + - cancelled + channels: + # Send them to a MS Teams channel + # Better to use an env var here + - msteams://mychanneltoken +``` + +### `NotificationRule` + +A notification rule defines when to send notifications and to whom. + +#### `pipeline_status` + +A list of 1 or more pipeline run status among: + + * `completed` + * `failed` + * `cancelled` + +#### `channels` + +A list of 1 or more recipients where to send the notifications. + +A channel is an *Apprise* URI string that defines an email address or a MS Teams +channel, for example: + +* **Email** mailto://myuser:mypass@gmail.com +* **MS Teams** msteams://TokenA/TokenB/TokenC/ +* **AWS SES** ses://user@domain/AccessKeyID/AccessSecretKey/RegionName/email1/ + +Behind the scene MarioPype uses [Apprise](https://github.com/caronc/apprise), +a library to send notifications to many notification providers, so check their +docs for a full list of the available channels. + +## Authentication + +MarioPype has a buil-in and ready-to-use authentication system +based on OAuth providers, so you can use your corporate auth system +or Google, Github, etc. + +To enable the auth system you just need to configure it. + +!!! info "Good to know" + + The auth system is based on [Authlib](https://authlib.org/) + +### `AuthSettings` + +Options available + +#### `client_id` + +An OAuth app client ID + +#### `client_secret` + +An OAuth app client secret + +#### `server_metadata_url` + +This a special URL that contains information about the OAuth provider +specific endpoints. If your provider doesn't have this URL or you don't +know it, you need to fill up the values for the other URLs: `access_token_url`, +`authorize_url` and `jwks_uri`. + +Here a table of well known Metadata URLs: + +| Provider | URL | +| -------- | --- | +| Google | https://accounts.google.com/.well-known/openid-configuration | + +#### `access_token_url` + +#### `authorize_url` + +#### `jwks_uri` + +#### `client_kwargs` + +Additional values to pass to the OAuth client during the auth +process, for example the scope: + +```yaml +auth: + client_kwargs: + scope: openid email profile +``` + +#### `secret_key` + +Secret key used in the backend middleware, this has a dummy default value, +but in production you should define a decent value. diff --git a/docs/create-a-pipeline.md b/docs/create-a-pipeline.md new file mode 100644 index 00000000..45ca723b --- /dev/null +++ b/docs/create-a-pipeline.md @@ -0,0 +1,108 @@ +# Create your first pipeline + +Create a new folder in your project root with +a file named `app.py` (or any name you want) in it, +as in Python files should be in a top-level package. + +This should be your folder structure: + +``` { .sh .no-copy } +. +โ”œโ”€ .venv/ # virtual environment folder +โ””โ”€ src/ + โ”œโ”€ __init__.py # empty file needed to declare Python modules + โ””โ”€ app.py # entrypoint to the project +``` + +## Glossary + +Before starting, let's define some naming so there will be no confusion! + +* **Task**: a python function that performs some job, it's the base block for building a pipeline +* **Pipeline**: a sequence of 1 or more *Task*s, a pipeline can be run via a schedule, manually, etc. +* **Trigger**: is the entrypoint to run a pipeline, a trigger can be a schedule, a webhook, a button on the web UI, etc. +* **Pipeline Run**: (sometimes simply referred as *Run*) is the result of running a pipeline + +## Basic pipeline + +A *Pipeline* contains a list of tasks and eventually a list of triggers, +so in your `app.py` add this: + +```py +from datetime import datetime +from random import randint + +from apscheduler.triggers.interval import IntervalTrigger +from mario import Mario, task, get_logger, Pipeline, Trigger + + +sales_pipeline = Pipeline( + id="sales_pipeline", + tasks = [fetch_raw_sales_data], + triggers = [ + Trigger( + id="daily", + name="Daily", + description="Run the pipeline every day", + aps_trigger=IntervalTrigger(days=1), + ) + ], +) +``` + +A *Task* is the base block in Mario Pype and it's just a Python function that +performs an action, i.e. download some data from an HTTP API, runs a query on a DB, etc. + +This is the task `fetch_raw_sales_data` used in the `sales_pipeline` pipeline ... it doesn't do much, +but it showcases the basics: + +!!! info + + notice how the `@task` decorator is used to declare a task + +```py +@task +async def fetch_raw_sales_data(): + # using MarioPype logger your logs will be stored + # and accessible on the web UI + logger = get_logger() + + logger.debug("Fetching sales data...") + + sales = [ + { + "price": randint(1, 1000), + "store_id": randint(1, 10), + "date": datetime.today(), + "sku": randint(1, 50), + } + for _ in range(50) + ] + + logger.info("Fetched %s sales data rows", len(sales)) + + # Return the results of your task to have it stored + # and accessible on the web UI + return sales +``` + +Finally create a Mario instance and register the pipeline so +MarioPype knows it's there: + +```py +app = Mario() + +app.register_pipeline(dummy_pipeline) +``` + +### Run Mario Pype + +Mario Pype is based on FastAPI so you can run it as a normal FastAPI app +via `uvicorn` or another ASGI web server: + +```sh +pip install uvicorn +uvicorn src.app:app --reload +``` + +Now open the page [http://localhost:8000](http://localhost:8000) in your browser and enjoy! diff --git a/docs/deployment.md b/docs/deployment.md new file mode 100644 index 00000000..e69de29b diff --git a/docs/get-started.md b/docs/get-started.md new file mode 100644 index 00000000..78763367 --- /dev/null +++ b/docs/get-started.md @@ -0,0 +1,44 @@ +## Prerequisites + +To run Mario Pype you only need Python (v3.8 or later), if you don't have it installed yet, go +to the [official Python website](https://www.python.org/downloads/), download it +and install it. + +## Installation + +It's a good practice to install dependencies specific to a project in +a dedicated virtual environment for that project. + +Many code editors (IDE) provide their own way to create virtual environments, +otherwise you can use directly the shell typing the following commands. + +Create a virtual enrivonment: + +```bash +# Run this in your project folder +python -m venv .venv +``` + +Activate it: +```bash +# on Mac/Linux +source .venv/bin/activate +``` + +```sh +# on Win +.venv/Script/activate +``` + +Then install the library: + +!!! info + + Mario Pype is not published yet on pypi.org, that's why you need to install it + from git! + +```sh +pip install git+https://github.com/lucafaggianelli/mario-pype +``` + +Now you're ready to write your first pipeline! diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..1383aaa6 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,23 @@ +--- +hide: + - toc +--- + +# Mario Pype + +An โš–๏ธ open source (MIT license) task scheduler for Python with a web UI and a REST API. + +If you need to run and monitor recurring python scripts then it's the right tool for you! + +## Features +* โฐ Task scheduling based on [APScheduler](https://github.com/agronholm/apscheduler) (supports Interval, Cron and Date triggers) +* ๐Ÿ’ป Built-in Web interface, no HTML/JS/CSS coding required +* ๐Ÿ‘ฉโ€๐Ÿ’ป๐Ÿ Pipelines and tasks are defined in pure Python +* ๐ŸŽ›๏ธ Pipelines can be parametrized via [Pydantic](https://docs.pydantic.dev/) +* ๐Ÿ‘‰ Pipelines can be run manually from the web UI +* ๐Ÿ” Secured via OAuth2 +* ๐Ÿ” Debug each run exploring logs and output data +* ๐Ÿ“ฉ Monitor the pipelines and get alerted if something goes wrong +* ๐Ÿ’ฃ Use the REST API for advanced integrations + +![Mario Pype Screen Shot](assets/images/screenshot.png) diff --git a/docs/overrides/main.html b/docs/overrides/main.html new file mode 100644 index 00000000..cd412029 --- /dev/null +++ b/docs/overrides/main.html @@ -0,0 +1,17 @@ +{% extends "base.html" %} + +{% block site_meta %} + {{ super() }} + + + + + + + + + + + + +{% endblock %} \ No newline at end of file diff --git a/docs/overrides/partials/integrations/analytics/plausible.html b/docs/overrides/partials/integrations/analytics/plausible.html new file mode 100644 index 00000000..791c237a --- /dev/null +++ b/docs/overrides/partials/integrations/analytics/plausible.html @@ -0,0 +1 @@ + diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..e69de29b diff --git a/examples/dummy/dummy_pipeline.py b/examples/dummy/dummy_pipeline.py index d676d1a9..d4a16257 100644 --- a/examples/dummy/dummy_pipeline.py +++ b/examples/dummy/dummy_pipeline.py @@ -17,11 +17,13 @@ class InputParams(BaseModel): @task -async def get_sales_data(data, params: InputParams): +async def get_sales_data(params: InputParams): """Fetch raw sales data by store and SKU""" logger = get_logger() + logger.info("Pipeline called with some_value=%d", params.some_value) + for i in range(10): await sleep(1 + np.random.random() / 2) logger.debug("Iteration %d", i) diff --git a/mario/api/authentication.py b/mario/api/authentication.py index 6216d290..73fc22b8 100644 --- a/mario/api/authentication.py +++ b/mario/api/authentication.py @@ -27,6 +27,7 @@ async def get_current_user_no_auth(request: Request): name="default", client_id=settings.auth.client_id.get_secret_value(), client_secret=settings.auth.client_secret.get_secret_value(), + server_metadata_url=settings.auth.server_metadata_url, access_token_url=settings.auth.access_token_url, authorize_url=settings.auth.authorize_url, jwks_uri=settings.auth.jwks_uri, diff --git a/mario/config.py b/mario/config.py index f15f98b1..2abd44d6 100644 --- a/mario/config.py +++ b/mario/config.py @@ -69,6 +69,7 @@ def settings_file_source(settings: BaseSettings) -> Dict[str, Any]: class AuthSettings(BaseModel): client_id: SecretStr client_secret: SecretStr + server_metadata_url: HttpUrl access_token_url: HttpUrl authorize_url: HttpUrl jwks_uri: HttpUrl @@ -81,7 +82,7 @@ class Settings(BaseSettings): database_url: str = "sqlite:///./mario.db" notifications: Optional[List[NotificationRule]] server_url: Optional[AnyHttpUrl] = "http://localhost:8000" - frontend_url: Optional[AnyHttpUrl] = "http://localhost:5173" + frontend_url: Optional[AnyHttpUrl] = "http://localhost:8000" class Config: env_file = BASE_SETTINGS_FOLDER / ".env" diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..ae8efaad --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,68 @@ +site_name: Mario Pype +site_url: https://lucafaggianelli.github.io/mario-pype +copyright: Copyright © 2023 Luca Faggianelli + +repo_url: https://github.com/lucafaggianelli/mario-pype +edit_uri: edit/main/docs/ + +extra_css: + - assets/stylesheets/extra.css + +theme: + name: material + logo: assets/images/mario-pipe-flower.png + favicon: assets/images/mario-pipe-flower.png + + palette: + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + scheme: default + primary: deep purple + accent: cyan + toggle: + icon: material/weather-sunny + name: Switch to dark mode + + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: deep purple + accent: cyan + toggle: + icon: material/weather-night + name: Switch to light mode + + features: + - content.action.view + - content.action.edit + - content.code.copy + - navigation.footer + - navigation.instant + - navigation.top + - toc.follow + + icon: + repo: fontawesome/brands/github + edit: material/pencil + view: material/eye + + custom_dir: docs/overrides + +extra: + analytics: + provider: plausible + property: + +markdown_extensions: + - pymdownx.highlight: + anchor_linenums: true + - admonition + - pymdownx.details + - pymdownx.superfences + +nav: + - Welcome: index.md + - Get started: get-started.md + - Your first pipeline: create-a-pipeline.md + - Configuration: configuration.md + - Deployment: deployment.md