Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add replicate web demo and API #265

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# The .dockerignore file excludes files from the container build process.
#
# https://docs.docker.com/engine/reference/builder/#dockerignore-file

# Exclude Git files
.git
.github
.gitignore

# Exclude Python cache files
__pycache__
.mypy_cache
.pytest_cache
.ruff_cache

# Exclude Python virtual environment
/venv
pretrained_models/
output*.mp4
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,7 @@ pretrained_models/
# Secret files
hostfile
gradio_cached_examples/

# cog demo files
output*.mp4
.cog
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
<a href="https://twitter.com/yangyou1991/status/1769411544083996787?s=61&t=jT0Dsx2d-MS5vS9rNM5e5g"><img src="https://img.shields.io/badge/Twitter-Discuss-blue?logo=twitter&amp"></a>
<a href="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/WeChat.png"><img src="https://img.shields.io/badge/微信-小助手加群-green?logo=wechat&amp"></a>
<a href="https://hpc-ai.com/blog/open-sora-v1.0"><img src="https://img.shields.io/badge/Open_Sora-Blog-blue"></a>
<a href="https://replicate.com/jd7h/open-sora-512"><img src="https://replicate.com/jd7h/open-sora-512/badge"></a>

</div>

## Open-Sora: Democratizing Efficient Video Production for All
Expand Down
51 changes: 51 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md

build:
# set to true if your model requires a GPU
gpu: true
cuda: "12.1"

# a list of ubuntu apt packages to install
system_packages:
- "libgl1-mesa-glx"
# - "libglib2.0-0"

# python version in the form '3.11' or '3.11.4'
python_version: "3.10"

# a list of packages in the format <package-name>==<version>
python_packages:
- "torch==2.1.0"
- "torchvision"
- "packaging"
- "ninja"
- "xformers"
- "colossalai"
- "accelerate"
- "diffusers"
- "ftfy"
- "gdown"
- "mmengine"
- "pre-commit"
- "pyav"
- "tensorboard"
- "timm"
- "tqdm"
- "transformers"
- "wandb"
# - "numpy==1.19.4"
# - "torch==1.8.0"
# - "torchvision==0.9.0"

# commands run after the environment is setup
run:
- 'pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git'
- 'pip install --no-build-isolation flash-attn'
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.6.2/pget_Linux_x86_64" && chmod +x /usr/local/bin/pget

# - "echo env is ready!"
# - "echo another command if needed"

# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
168 changes: 168 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md

import os
import random
import subprocess
import shutil
import time
from typing import List

import numpy as np
import torch
from cog import BasePredictor, Input, Path
from mmengine.config import Config
from mmengine.runner import set_random_seed

from opensora.datasets import save_sample
from opensora.registry import MODELS, SCHEDULERS, build_module
from opensora.utils.config_utils import merge_args
from opensora.utils.misc import to_torch_dtype

MAX_SEED = np.iinfo(np.int32).max

MODEL_URL = "https://weights.replicate.delivery/default/open-sora/opensora.tar"
WEIGHTS_FOLDER = "pretrained_models"

def download_weights(url, dest, extract=True):
start = time.time()
print("downloading url: ", url)
print("downloading to: ", dest)
args = ["pget"]
if extract:
args.append("-x")
subprocess.check_call(args + [url, dest], close_fds=False)
print("downloading took: ", time.time() - start)

def cog_config():
# taken from 16x512x512.py
cfg = Config(dict(
num_frames = 16,
fps = 24 // 3,
image_size = (512, 512),
dtype = "fp16",
batch_size = 2,
seed = 42,
prompt_path = "./assets/texts/t2v_samples.txt",
save_dir = "./outputs/samples/",
))

cfg.model = dict(
type="STDiT-XL/2",
space_scale=1.0,
time_scale=1.0,
enable_flashattn=True,
enable_layernorm_kernel=True,
from_pretrained="PRETRAINED_MODEL"
)
cfg.vae = dict(
type="VideoAutoencoderKL",
from_pretrained="stabilityai/sd-vae-ft-ema",
micro_batch_size=128,
)
cfg.text_encoder = dict(
type="t5",
from_pretrained="./pretrained_models/t5_ckpts",
model_max_length=120,
)
cfg.scheduler = dict(
type="iddpm",
num_sampling_steps=100,
cfg_scale=7.0,
)
return cfg


class Predictor(BasePredictor):
def setup(self) -> None:
"""Load the model into memory to make running multiple predictions efficient"""
# install open sora from github repo
subprocess.check_call("pip install -q .".split())

# download model
if not os.path.exists(WEIGHTS_FOLDER):
download_weights(MODEL_URL, WEIGHTS_FOLDER, extract=True)

# command line arguments from opensora.utils.config_utils
extra_args = Config({
'seed': 42,
'ckpt_path': "pretrained_models/Open-Sora/OpenSora-v1-HQ-16x512x512.pth",
'batch-size': None,
'prompt-path': None,
'save-dir': None,
'num-sampling-steps': None,
'cfg_scale': None,
})

self.cfg = cog_config()
self.cfg = merge_args(self.cfg, args=extra_args, training=False)

torch.set_grad_enabled(False)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
self.device = "cuda" if torch.cuda.is_available() else "cpu"

self.dtype = to_torch_dtype(self.cfg.dtype)

input_size = (self.cfg.num_frames, *self.cfg.image_size)
self.vae = build_module(self.cfg.vae, MODELS)
self.latent_size = self.vae.get_latent_size(input_size)
self.text_encoder = build_module(self.cfg.text_encoder, MODELS, device=self.device) # T5 must be fp32
self.model = build_module(
self.cfg.model,
MODELS,
input_size=self.latent_size,
in_channels=self.vae.out_channels,
caption_channels=self.text_encoder.output_dim,
model_max_length=self.text_encoder.model_max_length,
dtype=self.dtype,
enable_sequence_parallelism=False,
)
self.text_encoder.y_embedder = self.model.y_embedder # hack for classifier-free guidance

self.vae = self.vae.to(self.device, self.dtype).eval()
self.model = self.model.to(self.device, self.dtype).eval()
self.scheduler = build_module(self.cfg.scheduler, SCHEDULERS)

self.model_args = dict()
if self.cfg.multi_resolution:
image_size = self.cfg.image_size
hw = torch.tensor([image_size], device=self.device, dtype=self.dtype).repeat(self.cfg.batch_size, 1)
ar = torch.tensor([[image_size[0] / image_size[1]]], device=self.device, dtype=self.dtype).repeat(self.cfg.batch_size, 1)
self.model_args["data_info"] = dict(ar=ar, hw=hw)


def predict(
self,
prompt: str = Input(description="Prompt for the video"),
seed: int = Input(description="Seed. Leave blank to randomise", default=None),
) -> List[Path]:
"""Run a single prediction on the model"""

# remove old output directory
save_dir = self.cfg.save_dir
if os.path.exists(save_dir):
shutil.rmtree(save_dir)

os.makedirs(save_dir, exist_ok=True)

# randomize seed
if seed is None:
seed = random.randint(0, MAX_SEED)
print(f"Using seed {seed}...")
set_random_seed(seed=seed)

samples = self.scheduler.sample(
self.model,
self.text_encoder,
z_size=(self.vae.out_channels, *self.latent_size),
prompts=[prompt],
device=self.device,
additional_args=self.model_args,
)
samples = self.vae.decode(samples.to(self.dtype))

save_path = os.path.join(save_dir, f"output")
save_sample(samples[0], fps=self.cfg.fps, save_path=save_path) # write file to {save_path}.mp4

return [Path(f"{save_path}.mp4")]