Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP - Runpod integration #522

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 118 additions & 21 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,120 @@ jobs:
# persistent data location
root = "/runner/build/containerd"

# - name: Docker meta
# id: meta
# uses: docker/metadata-action@v5
# with:
# images: |
# ghcr.io/predibase/lorax
# tags: |
# type=semver,pattern={{version}}
# type=semver,pattern={{major}}.{{minor}}
# type=sha,prefix=,suffix=,format=short
# type=raw,value=main,enable=${{ github.ref == 'refs/heads/main' }}

# - name: Create a hash from tags
# env:
# tags: ${{ steps.meta.outputs.tags }}
# id: vars
# run: |
# tag_hash=$(echo -n "$tags" | md5sum | awk '{print $1}')
# echo "tag_hash=$tag_hash" >> $GITHUB_OUTPUT
# echo "cache_dir=/runner/build/images/cache" >> $GITHUB_OUTPUT
# echo "image_dir=/runner/build/images" >> $GITHUB_OUTPUT
# echo "image_path=/runner/build/images/lorax" >> $GITHUB_OUTPUT

# - name: Create and update image/cache directory
# env:
# image_dir: ${{ steps.vars.outputs.image_dir }}
# cache_dir: ${{ steps.vars.outputs.cache_dir }}
# run: |
# sudo mkdir -p $image_dir
# sudo chown ubuntu:ubuntu $image_dir

# sudo mkdir -p $cache_dir
# sudo chown ubuntu:ubuntu $cache_dir

# - name: Export Docker image as OCI
# uses: docker/build-push-action@v5
# with:
# context: .
# file: ./Dockerfile # Path to your Dockerfile
# push: false
# tags: ${{ steps.meta.outputs.tags }}
# outputs: type=oci,compression=gzip,dest=${{ steps.vars.outputs.image_path }}-${{ steps.vars.outputs.tag_hash }}.tar.gz
# cache-from: type=local,src=${{ steps.vars.outputs.cache_dir }}
# cache-to: type=local,mode=max,image-manifest=true,oci-mediatypes=true,dest=${{ steps.vars.outputs.cache_dir }}

# - name: Import image in containerd
# env:
# tag_hash: ${{ steps.vars.outputs.tag_hash }}
# image_path: ${{ steps.vars.outputs.image_path }}
# run: |
# echo "Importing $image_path-$tag_hash to Containerd"
# sudo ctr i import --no-unpack --all-platforms --digests $image_path-$tag_hash.tar.gz

# - name: Log in to GitHub Container Registry
# uses: docker/login-action@v1
# with:
# registry: ghcr.io
# username: ${{ github.repository_owner }}
# password: ${{ secrets.GHCR_PAT }}

# - name: Push image with containerd
# env:
# tags: ${{ steps.meta.outputs.tags }}
# run: |
# for tag in $tags
# do
# echo "Pushing $tag to GHCR"
# sudo ctr i push --user "${{ github.repository_owner }}:${{ secrets.GHCR_PAT }}" $tag
# done

# - name: Create and push soci index
# env:
# tags: ${{ steps.meta.outputs.tags }}
# run: |
# export SOCI_PATH=$HOME/.soci/soci
# for tag in $tags
# do
# echo "Creating soci index for $tag"
# sudo $SOCI_PATH create $tag
# echo "Pushing soci index for $tag"
# sudo $SOCI_PATH push --user ${{ github.repository_owner }}:${{ secrets.GHCR_PAT }} $tag
# done

# - name: Prune older images
# env:
# tag_hash: ${{ steps.vars.outputs.tag_hash }}
# image_path: ${{ steps.vars.outputs.image_path }}
# run: |
# # Delete images older than a day from docker store
# docker image prune -a -f --filter "until=24h"

# # Delete the on disk copy
# rm -rf "$image_path-$tag_hash.tar.gz"

# # Delete the SHA image(s) from containerd store
# sudo ctr i rm $(sudo ctr i ls -q)

#### new build test
- name: Docker meta
id: meta
id: meta1
uses: docker/metadata-action@v5
with:
images: |
ghcr.io/predibase/lorax
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha,prefix=,suffix=,format=short
type=sha,prefix=runpod-,suffix=,format=short
type=raw,value=main,enable=${{ github.ref == 'refs/heads/main' }}

- name: Create a hash from tags
env:
tags: ${{ steps.meta.outputs.tags }}
id: vars
tags: ${{ steps.meta1.outputs.tags }}
id: vars1
run: |
tag_hash=$(echo -n "$tags" | md5sum | awk '{print $1}')
echo "tag_hash=$tag_hash" >> $GITHUB_OUTPUT
Expand All @@ -80,8 +178,8 @@ jobs:

- name: Create and update image/cache directory
env:
image_dir: ${{ steps.vars.outputs.image_dir }}
cache_dir: ${{ steps.vars.outputs.cache_dir }}
image_dir: ${{ steps.vars1.outputs.image_dir }}
cache_dir: ${{ steps.vars1.outputs.cache_dir }}
run: |
sudo mkdir -p $image_dir
sudo chown ubuntu:ubuntu $image_dir
Expand All @@ -92,18 +190,18 @@ jobs:
- name: Export Docker image as OCI
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile # Path to your Dockerfile
context: ./runpod
file: ./runpod/Dockerfile # Path to your Dockerfile
push: false
tags: ${{ steps.meta.outputs.tags }}
outputs: type=oci,compression=gzip,dest=${{ steps.vars.outputs.image_path }}-${{ steps.vars.outputs.tag_hash }}.tar.gz
cache-from: type=local,src=${{ steps.vars.outputs.cache_dir }}
cache-to: type=local,mode=max,image-manifest=true,oci-mediatypes=true,dest=${{ steps.vars.outputs.cache_dir }}
tags: ${{ steps.meta1.outputs.tags }}
outputs: type=oci,compression=gzip,dest=${{ steps.vars1.outputs.image_path }}-${{ steps.vars1.outputs.tag_hash }}.tar.gz
cache-from: type=local,src=${{ steps.vars1.outputs.cache_dir }}
cache-to: type=local,mode=max,image-manifest=true,oci-mediatypes=true,dest=${{ steps.vars1.outputs.cache_dir }}

- name: Import image in containerd
env:
tag_hash: ${{ steps.vars.outputs.tag_hash }}
image_path: ${{ steps.vars.outputs.image_path }}
tag_hash: ${{ steps.vars1.outputs.tag_hash }}
image_path: ${{ steps.vars1.outputs.image_path }}
run: |
echo "Importing $image_path-$tag_hash to Containerd"
sudo ctr i import --no-unpack --all-platforms --digests $image_path-$tag_hash.tar.gz
Expand All @@ -117,17 +215,17 @@ jobs:

- name: Push image with containerd
env:
tags: ${{ steps.meta.outputs.tags }}
tags: ${{ steps.meta1.outputs.tags }}
run: |
for tag in $tags
do
echo "Pushing $tag to GHCR"
sudo ctr i push --user "${{ github.repository_owner }}:${{ secrets.GHCR_PAT }}" $tag
done

- name: Create and push soci index
env:
tags: ${{ steps.meta.outputs.tags }}
tags: ${{ steps.meta1.outputs.tags }}
run: |
export SOCI_PATH=$HOME/.soci/soci
for tag in $tags
Expand All @@ -140,8 +238,8 @@ jobs:

- name: Prune older images
env:
tag_hash: ${{ steps.vars.outputs.tag_hash }}
image_path: ${{ steps.vars.outputs.image_path }}
tag_hash: ${{ steps.vars1.outputs.tag_hash }}
image_path: ${{ steps.vars1.outputs.image_path }}
run: |
# Delete images older than a day from docker store
docker image prune -a -f --filter "until=24h"
Expand All @@ -151,4 +249,3 @@ jobs:

# Delete the SHA image(s) from containerd store
sudo ctr i rm $(sudo ctr i ls -q)

75 changes: 75 additions & 0 deletions runpod/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Base image
# TODO change the lorax base image
FROM ghcr.io/predibase/lorax:0.10.0
ENV DEBIAN_FRONTEND=noninteractive

# Set the working directory
WORKDIR /

# Update and upgrade the system packages (Worker Template)
COPY builder/setup.sh /setup.sh
RUN /bin/bash /setup.sh && \
rm /setup.sh

# Install Python dependencies (Worker Template)
# COPY builder/requirements.txt /requirements.txt
RUN python3 -m pip install --upgrade pip && \
python3 -m pip install runpod

# Add src files (Worker Template)
ADD src .

# Whether to download the model into /runpod-volume or not.
ARG DOWNLOAD_MODEL=
ENV DOWNLOAD_MODEL=$DOWNLOAD_MODEL

# Set environment variables
ARG HF_MODEL_ID=
ENV HF_MODEL_ID=$HF_MODEL_ID

ARG HF_MODEL_REVISION=
ENV HF_MODEL_REVISION=$HF_MODEL_REVISION

ARG SM_NUM_GPUS=
ENV SM_NUM_GPUS=$SM_NUM_GPUS

ARG HF_MODEL_QUANTIZE=
ENV HF_MODEL_QUANTIZE=$HF_MODEL_QUANTIZE

ARG HF_MODEL_TRUST_REMOTE_CODE=
ENV HF_MODEL_TRUST_REMOTE_CODE=$HF_MODEL_TRUST_REMOTE_CODE

ARG MODEL_BASE_PATH="/runpod-volume/"
ENV MODEL_BASE_PATH=$MODEL_BASE_PATH

ARG HUGGING_FACE_HUB_TOKEN=
ENV HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN

ARG HF_MAX_TOTAL_TOKENS=
ENV HF_MAX_TOTAL_TOKENS=$HF_MAX_TOTAL_TOKENS

ARG HF_MAX_INPUT_LENGTH=
ENV HF_MAX_INPUT_LENGTH=$HF_MAX_INPUT_LENGTH

ARG HF_MAX_BATCH_TOTAL_TOKENS=
ENV HF_MAX_BATCH_TOTAL_TOKENS=$HF_MAX_BATCH_TOTAL_TOKENS

ARG HF_MAX_BATCH_PREFILL_TOKENS=
ENV HF_MAX_BATCH_PREFILL_TOKENS=$HF_MAX_BATCH_PREFILL_TOKENS

# Prepare the hugging face directories for caching datasets, models, and more.
ENV HF_DATASETS_CACHE="/runpod-volume/huggingface-cache/datasets"
ENV HUGGINGFACE_HUB_CACHE="/runpod-volume/huggingface-cache/hub"
ENV TRANSFORMERS_CACHE="/runpod-volume/huggingface-cache/hub"

# Conditionally download the model weights based on DOWNLOAD_MODEL
RUN if [ "$DOWNLOAD_MODEL" = "1" ]; then \
lorax-server download-weights $HF_MODEL_ID; \
fi

# Quick temporary updates
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
RUN python3.10 -m pip install lorax-client
RUN python3.10 -m pip install openai

ENTRYPOINT ["./entrypoint.sh"]
23 changes: 23 additions & 0 deletions runpod/builder/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

# Stop script on error
set -e

# Update System
apt-get update && apt-get upgrade -y

# Install System Dependencies
# - openssh-server: for ssh access and web terminal
apt-get install -y --no-install-recommends software-properties-common curl git openssh-server

# Install Python 3.10
add-apt-repository ppa:deadsnakes/ppa -y
apt-get update && apt-get install -y --no-install-recommends python3.10 python3.10-dev python3.10-distutils
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1

# Install pip for Python 3.10
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
python3 get-pip.py

# Clean up
apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*
45 changes: 45 additions & 0 deletions runpod/src/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash

# if [[ -z "${HF_MODEL_ID}" ]]; then
# echo "HF_MODEL_ID must be set"
# exit 1
# fi
# export MODEL_ID="${HF_MODEL_ID}"

if [[ -n "${HF_MODEL_REVISION}" ]]; then
export REVISION="${HF_MODEL_REVISION}"
fi

if [[ -n "${SM_NUM_GPUS}" ]]; then
export NUM_SHARD="${SM_NUM_GPUS}"
fi

if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then
export QUANTIZE="${HF_MODEL_QUANTIZE}"
fi

if [[ -n "${HF_MODEL_TRUST_REMOTE_CODE}" ]]; then
export TRUST_REMOTE_CODE="${HF_MODEL_TRUST_REMOTE_CODE}"
fi

if [[ -n "${HF_MAX_TOTAL_TOKENS}" ]]; then
export MAX_TOTAL_TOKENS="${HF_MAX_TOTAL_TOKENS}"
fi

if [[ -n "${HF_MAX_INPUT_LENGTH}" ]]; then
export MAX_INPUT_LENGTH="${HF_MAX_INPUT_LENGTH}"
fi

if [[ -n "${HF_MAX_BATCH_TOTAL_TOKENS}" ]]; then
export MAX_BATCH_TOTAL_TOKENS="${HF_MAX_BATCH_TOTAL_TOKENS}"
fi

if [[ -n "${HF_MAX_BATCH_PREFILL_TOKENS}" ]]; then
export MAX_BATCH_PREFILL_TOKENS="${HF_MAX_BATCH_PREFILL_TOKENS}"
fi

# Start the text generation server
nohup lorax-launcher --port 8080 --model-id predibase/Meta-Llama-3-8B-Instruct-dequantized --adapter-source hub --default-adapter-source pbase --max-batch-prefill-tokens 32768 --max-total-tokens 8192 --max-input-length 8191 --max-concurrent-requests 1024 &

# Start the handler using python 3.10
python3.10 -u /handler.py
Loading