Skip to content

Commit

Permalink
[Feature] Support AnimateDiff, a popular text2animation method (#1980)
Browse files Browse the repository at this point in the history
* first commit for animatediff

* fix lint errors

* modify readme file and add readme_zh-CN.md

* fix some typos in readme

* delete test_animatediff.py

* add some docstring

* fix cross attention for 512*512 animation quality

* fix some initial setting for cpu load

* add unittest samples

* modify unittest codes

* remove duplicated unittest files

* modify unittest codes for minimum memory

* modify test_unet3d resolution for minimum memory unittest

* modify test_unet_blocks3d input resolution for minimum memory unittest

* modify animatediff.py for gradio

* add gradio app for animatediff

* skip test with large memory

* fix environment building

* fix merging conflict

* Add different style ckpt

* fix environment building

* add new motion module

* add prompts for all config files in README

* add image in README

* fix sd ckpt auto downloading

* remove unused import in test code

* align README_zh and README

* fix building error

* delete unused comments

* fix test memory

* fix text_model error for later transformer version

* fix comment copyright

* add animatediff gradio README

* modify some copyright in motion_module.py

* modify README for better test guidance

* fix inference without xformers and mimsave for higher version of imageio

* fix errors in different versions of imageio

* add train tutorial and pretrained models

* fix some comments in README

* delete personal information

* fix gradio sd selection

* add some tips for run gradio

* add pretrained links

---------

Co-authored-by: rangoliu <[email protected]>
  • Loading branch information
ElliotQi and liuwenran authored Sep 20, 2023
1 parent 9e55603 commit bd7c295
Show file tree
Hide file tree
Showing 27 changed files with 6,518 additions and 1 deletion.
221 changes: 221 additions & 0 deletions configs/animatediff/README.md

Large diffs are not rendered by default.

220 changes: 220 additions & 0 deletions configs/animatediff/README_zh-CN.md

Large diffs are not rendered by default.

61 changes: 61 additions & 0 deletions configs/animatediff/animatediff_Lyriel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# config for model
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5'
models_path = '/home/AnimateDiff/models/'
randomness = dict(
seed=[
10917152860782582783, 6399018107401806238, 15875751942533906793,
6653196880059936551
],
diff_rank_seed=True)

diffusion_scheduler = dict(
type='DDIMScheduler',
beta_end=0.012,
beta_schedule='linear',
beta_start=0.00085,
num_train_timesteps=1000,
prediction_type='epsilon',
set_alpha_to_one=True,
clip_sample=False,
thresholding=False,
steps_offset=1)

model = dict(
type='AnimateDiff',
vae=dict(
type='AutoencoderKL',
from_pretrained=stable_diffusion_v15_url,
subfolder='vae'),
unet=dict(
type='UNet3DConditionMotionModel',
unet_use_cross_frame_attention=False,
unet_use_temporal_attention=False,
use_motion_module=True,
motion_module_resolutions=[1, 2, 4, 8],
motion_module_mid_block=False,
motion_module_decoder_only=False,
motion_module_type='Vanilla',
motion_module_kwargs=dict(
num_attention_heads=8,
num_transformer_block=1,
attention_block_types=['Temporal_Self', 'Temporal_Self'],
temporal_position_encoding=True,
temporal_position_encoding_max_len=24,
temporal_attention_dim_div=1),
subfolder='unet',
from_pretrained=stable_diffusion_v15_url),
text_encoder=dict(
type='ClipWrapper',
clip_type='huggingface',
pretrained_model_name_or_path=stable_diffusion_v15_url,
subfolder='text_encoder'),
tokenizer=stable_diffusion_v15_url,
scheduler=diffusion_scheduler,
test_scheduler=diffusion_scheduler,
data_preprocessor=dict(type='DataPreprocessor'),
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'),
dream_booth_lora_cfg=dict(
type='ToonYou',
path=models_path + 'DreamBooth_LoRA/lyriel_v16.safetensors',
steps=25,
guidance_scale=7.5))
62 changes: 62 additions & 0 deletions configs/animatediff/animatediff_MajicMix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# config for model
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5'
models_path = '/home/AnimateDiff/models/'
randomness = dict(
seed=[
1572448948722921032, 1099474677988590681, 6488833139725635347,
18339859844376517918
],
diff_rank_seed=True)

diffusion_scheduler = dict(
type='DDIMScheduler',
beta_end=0.012,
beta_schedule='linear',
beta_start=0.00085,
num_train_timesteps=1000,
prediction_type='epsilon',
set_alpha_to_one=True,
clip_sample=False,
thresholding=False,
steps_offset=1)

model = dict(
type='AnimateDiff',
vae=dict(
type='AutoencoderKL',
from_pretrained=stable_diffusion_v15_url,
subfolder='vae'),
unet=dict(
type='UNet3DConditionMotionModel',
unet_use_cross_frame_attention=False,
unet_use_temporal_attention=False,
use_motion_module=True,
motion_module_resolutions=[1, 2, 4, 8],
motion_module_mid_block=False,
motion_module_decoder_only=False,
motion_module_type='Vanilla',
motion_module_kwargs=dict(
num_attention_heads=8,
num_transformer_block=1,
attention_block_types=['Temporal_Self', 'Temporal_Self'],
temporal_position_encoding=True,
temporal_position_encoding_max_len=24,
temporal_attention_dim_div=1),
subfolder='unet',
from_pretrained=stable_diffusion_v15_url),
text_encoder=dict(
type='ClipWrapper',
clip_type='huggingface',
pretrained_model_name_or_path=stable_diffusion_v15_url,
subfolder='text_encoder'),
tokenizer=stable_diffusion_v15_url,
scheduler=diffusion_scheduler,
test_scheduler=diffusion_scheduler,
data_preprocessor=dict(type='DataPreprocessor'),
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'),
dream_booth_lora_cfg=dict(
type='ToonYou',
path=models_path +
'DreamBooth_LoRA/majicmixRealistic_v5Preview.safetensors',
steps=25,
guidance_scale=7.5))
61 changes: 61 additions & 0 deletions configs/animatediff/animatediff_RcnzCartoon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# config for model
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5'
models_path = '/home/AnimateDiff/models/'
randomness = dict(
seed=[
16931037867122267877, 2094308009433392066, 4292543217695451092,
15572665120852309890
],
diff_rank_seed=True)

diffusion_scheduler = dict(
type='DDIMScheduler',
beta_end=0.012,
beta_schedule='linear',
beta_start=0.00085,
num_train_timesteps=1000,
prediction_type='epsilon',
set_alpha_to_one=True,
clip_sample=False,
thresholding=False,
steps_offset=1)

model = dict(
type='AnimateDiff',
vae=dict(
type='AutoencoderKL',
from_pretrained=stable_diffusion_v15_url,
subfolder='vae'),
unet=dict(
type='UNet3DConditionMotionModel',
unet_use_cross_frame_attention=False,
unet_use_temporal_attention=False,
use_motion_module=True,
motion_module_resolutions=[1, 2, 4, 8],
motion_module_mid_block=False,
motion_module_decoder_only=False,
motion_module_type='Vanilla',
motion_module_kwargs=dict(
num_attention_heads=8,
num_transformer_block=1,
attention_block_types=['Temporal_Self', 'Temporal_Self'],
temporal_position_encoding=True,
temporal_position_encoding_max_len=24,
temporal_attention_dim_div=1),
subfolder='unet',
from_pretrained=stable_diffusion_v15_url),
text_encoder=dict(
type='ClipWrapper',
clip_type='huggingface',
pretrained_model_name_or_path=stable_diffusion_v15_url,
subfolder='text_encoder'),
tokenizer=stable_diffusion_v15_url,
scheduler=diffusion_scheduler,
test_scheduler=diffusion_scheduler,
data_preprocessor=dict(type='DataPreprocessor'),
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'),
dream_booth_lora_cfg=dict(
type='ToonYou',
path=models_path + 'DreamBooth_LoRA/rcnzCartoon3d_v10.safetensors',
steps=25,
guidance_scale=7.5))
62 changes: 62 additions & 0 deletions configs/animatediff/animatediff_RealisticVision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# config for model
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5'
models_path = '/home/AnimateDiff/models/'
randomness = dict(
seed=[
5658137986800322009, 12099779162349365895, 10499524853910852697,
16768009035333711932
],
diff_rank_seed=True)

diffusion_scheduler = dict(
type='DDIMScheduler',
beta_end=0.012,
beta_schedule='linear',
beta_start=0.00085,
num_train_timesteps=1000,
prediction_type='epsilon',
set_alpha_to_one=True,
clip_sample=False,
thresholding=False,
steps_offset=1)

model = dict(
type='AnimateDiff',
vae=dict(
type='AutoencoderKL',
from_pretrained=stable_diffusion_v15_url,
subfolder='vae'),
unet=dict(
type='UNet3DConditionMotionModel',
unet_use_cross_frame_attention=False,
unet_use_temporal_attention=False,
use_motion_module=True,
motion_module_resolutions=[1, 2, 4, 8],
motion_module_mid_block=False,
motion_module_decoder_only=False,
motion_module_type='Vanilla',
motion_module_kwargs=dict(
num_attention_heads=8,
num_transformer_block=1,
attention_block_types=['Temporal_Self', 'Temporal_Self'],
temporal_position_encoding=True,
temporal_position_encoding_max_len=24,
temporal_attention_dim_div=1),
subfolder='unet',
from_pretrained=stable_diffusion_v15_url),
text_encoder=dict(
type='ClipWrapper',
clip_type='huggingface',
pretrained_model_name_or_path=stable_diffusion_v15_url,
subfolder='text_encoder'),
tokenizer=stable_diffusion_v15_url,
scheduler=diffusion_scheduler,
test_scheduler=diffusion_scheduler,
data_preprocessor=dict(type='DataPreprocessor'),
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'),
dream_booth_lora_cfg=dict(
type='ToonYou',
path=models_path +
'DreamBooth_LoRA/realisticVisionV20_v20.safetensors',
steps=25,
guidance_scale=7.5))
64 changes: 64 additions & 0 deletions configs/animatediff/animatediff_RealisticVision_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# config for model
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5'
models_path = '/home/AnimateDiff/models/'
randomness = dict(
seed=[
13100322578370451493, 14752961627088720670, 9329399085567825781,
16987697414827649302
],
diff_rank_seed=True)

diffusion_scheduler = dict(
type='DDIMScheduler',
beta_end=0.012,
beta_schedule='linear',
beta_start=0.00085,
num_train_timesteps=1000,
prediction_type='epsilon',
set_alpha_to_one=True,
clip_sample=False,
thresholding=False,
steps_offset=1)

model = dict(
type='AnimateDiff',
vae=dict(
type='AutoencoderKL',
from_pretrained=stable_diffusion_v15_url,
subfolder='vae'),
unet=dict(
type='UNet3DConditionMotionModel',
use_inflated_groupnorm=True,
unet_use_cross_frame_attention=False,
unet_use_temporal_attention=False,
use_motion_module=True,
motion_module_resolutions=[1, 2, 4, 8],
motion_module_mid_block=True,
motion_module_decoder_only=False,
motion_module_type='Vanilla',
motion_module_kwargs=dict(
num_attention_heads=8,
num_transformer_block=1,
attention_block_types=['Temporal_Self', 'Temporal_Self'],
temporal_position_encoding=True,
temporal_position_encoding_max_len=32,
temporal_attention_dim_div=1),
subfolder='unet',
from_pretrained=stable_diffusion_v15_url),
text_encoder=dict(
type='ClipWrapper',
clip_type='huggingface',
pretrained_model_name_or_path=stable_diffusion_v15_url,
subfolder='text_encoder'),
tokenizer=stable_diffusion_v15_url,
scheduler=diffusion_scheduler,
test_scheduler=diffusion_scheduler,
data_preprocessor=dict(type='DataPreprocessor'),
motion_module_cfg=dict(path=models_path +
'Motion_Module/mm_sd_v15_v2.ckpt'),
dream_booth_lora_cfg=dict(
type='ToonYou',
path=models_path +
'DreamBooth_LoRA/realisticVisionV20_v20.safetensors',
steps=25,
guidance_scale=7.5))
80 changes: 80 additions & 0 deletions configs/animatediff/animatediff_ToonYou.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# config for model
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5'
models_path = '/home/AnimateDiff/models/'
randomness = dict(
seed=[
10788741199826055526, 6520604954829636163, 6519455744612555650,
16372571278361863751
],
diff_rank_seed=True)

val_prompts = [
'best quality, masterpiece, 1girl, looking at viewer,\
blurry background, upper body, contemporary, dress',
'masterpiece, best quality, 1girl, solo, cherry blossoms,\
hanami, pink flower, white flower, spring season, wisteria,\
petals, flower, plum blossoms, outdoors, falling petals,\
white hair, black eyes,',
'best quality, masterpiece, 1boy, formal, abstract,\
looking at viewer, masculine, marble pattern',
'best quality, masterpiece, 1girl, cloudy sky,\
dandelion, contrapposto, alternate hairstyle,'
]
val_neg_propmts = [
'',
'badhandv4,easynegative,ng_deepnegative_v1_75t,verybadimagenegative_v1.3,\
bad-artist, bad_prompt_version2-neg, teeth',
'',
'',
]
diffusion_scheduler = dict(
type='DDIMScheduler',
beta_end=0.012,
beta_schedule='linear',
beta_start=0.00085,
num_train_timesteps=1000,
prediction_type='epsilon',
set_alpha_to_one=True,
clip_sample=False,
thresholding=False,
steps_offset=1)

model = dict(
type='AnimateDiff',
vae=dict(
type='AutoencoderKL',
from_pretrained=stable_diffusion_v15_url,
subfolder='vae'),
unet=dict(
type='UNet3DConditionMotionModel',
unet_use_cross_frame_attention=False,
unet_use_temporal_attention=False,
use_motion_module=True,
motion_module_resolutions=[1, 2, 4, 8],
motion_module_mid_block=False,
motion_module_decoder_only=False,
motion_module_type='Vanilla',
motion_module_kwargs=dict(
num_attention_heads=8,
num_transformer_block=1,
attention_block_types=['Temporal_Self', 'Temporal_Self'],
temporal_position_encoding=True,
temporal_position_encoding_max_len=24,
temporal_attention_dim_div=1),
subfolder='unet',
from_pretrained=stable_diffusion_v15_url),
text_encoder=dict(
type='ClipWrapper',
clip_type='huggingface',
pretrained_model_name_or_path=stable_diffusion_v15_url,
subfolder='text_encoder'),
tokenizer=stable_diffusion_v15_url,
scheduler=diffusion_scheduler,
test_scheduler=diffusion_scheduler,
data_preprocessor=dict(type='DataPreprocessor'),
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'),
dream_booth_lora_cfg=dict(
type='ToonYou',
path=models_path + 'DreamBooth_LoRA/toonyou_beta3.safetensors',
steps=25,
guidance_scale=7.5))
Loading

0 comments on commit bd7c295

Please sign in to comment.