-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] Support AnimateDiff, a popular text2animation method (#1980)
* first commit for animatediff * fix lint errors * modify readme file and add readme_zh-CN.md * fix some typos in readme * delete test_animatediff.py * add some docstring * fix cross attention for 512*512 animation quality * fix some initial setting for cpu load * add unittest samples * modify unittest codes * remove duplicated unittest files * modify unittest codes for minimum memory * modify test_unet3d resolution for minimum memory unittest * modify test_unet_blocks3d input resolution for minimum memory unittest * modify animatediff.py for gradio * add gradio app for animatediff * skip test with large memory * fix environment building * fix merging conflict * Add different style ckpt * fix environment building * add new motion module * add prompts for all config files in README * add image in README * fix sd ckpt auto downloading * remove unused import in test code * align README_zh and README * fix building error * delete unused comments * fix test memory * fix text_model error for later transformer version * fix comment copyright * add animatediff gradio README * modify some copyright in motion_module.py * modify README for better test guidance * fix inference without xformers and mimsave for higher version of imageio * fix errors in different versions of imageio * add train tutorial and pretrained models * fix some comments in README * delete personal information * fix gradio sd selection * add some tips for run gradio * add pretrained links --------- Co-authored-by: rangoliu <[email protected]>
- Loading branch information
Showing
27 changed files
with
6,518 additions
and
1 deletion.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# config for model | ||
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5' | ||
models_path = '/home/AnimateDiff/models/' | ||
randomness = dict( | ||
seed=[ | ||
10917152860782582783, 6399018107401806238, 15875751942533906793, | ||
6653196880059936551 | ||
], | ||
diff_rank_seed=True) | ||
|
||
diffusion_scheduler = dict( | ||
type='DDIMScheduler', | ||
beta_end=0.012, | ||
beta_schedule='linear', | ||
beta_start=0.00085, | ||
num_train_timesteps=1000, | ||
prediction_type='epsilon', | ||
set_alpha_to_one=True, | ||
clip_sample=False, | ||
thresholding=False, | ||
steps_offset=1) | ||
|
||
model = dict( | ||
type='AnimateDiff', | ||
vae=dict( | ||
type='AutoencoderKL', | ||
from_pretrained=stable_diffusion_v15_url, | ||
subfolder='vae'), | ||
unet=dict( | ||
type='UNet3DConditionMotionModel', | ||
unet_use_cross_frame_attention=False, | ||
unet_use_temporal_attention=False, | ||
use_motion_module=True, | ||
motion_module_resolutions=[1, 2, 4, 8], | ||
motion_module_mid_block=False, | ||
motion_module_decoder_only=False, | ||
motion_module_type='Vanilla', | ||
motion_module_kwargs=dict( | ||
num_attention_heads=8, | ||
num_transformer_block=1, | ||
attention_block_types=['Temporal_Self', 'Temporal_Self'], | ||
temporal_position_encoding=True, | ||
temporal_position_encoding_max_len=24, | ||
temporal_attention_dim_div=1), | ||
subfolder='unet', | ||
from_pretrained=stable_diffusion_v15_url), | ||
text_encoder=dict( | ||
type='ClipWrapper', | ||
clip_type='huggingface', | ||
pretrained_model_name_or_path=stable_diffusion_v15_url, | ||
subfolder='text_encoder'), | ||
tokenizer=stable_diffusion_v15_url, | ||
scheduler=diffusion_scheduler, | ||
test_scheduler=diffusion_scheduler, | ||
data_preprocessor=dict(type='DataPreprocessor'), | ||
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'), | ||
dream_booth_lora_cfg=dict( | ||
type='ToonYou', | ||
path=models_path + 'DreamBooth_LoRA/lyriel_v16.safetensors', | ||
steps=25, | ||
guidance_scale=7.5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# config for model | ||
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5' | ||
models_path = '/home/AnimateDiff/models/' | ||
randomness = dict( | ||
seed=[ | ||
1572448948722921032, 1099474677988590681, 6488833139725635347, | ||
18339859844376517918 | ||
], | ||
diff_rank_seed=True) | ||
|
||
diffusion_scheduler = dict( | ||
type='DDIMScheduler', | ||
beta_end=0.012, | ||
beta_schedule='linear', | ||
beta_start=0.00085, | ||
num_train_timesteps=1000, | ||
prediction_type='epsilon', | ||
set_alpha_to_one=True, | ||
clip_sample=False, | ||
thresholding=False, | ||
steps_offset=1) | ||
|
||
model = dict( | ||
type='AnimateDiff', | ||
vae=dict( | ||
type='AutoencoderKL', | ||
from_pretrained=stable_diffusion_v15_url, | ||
subfolder='vae'), | ||
unet=dict( | ||
type='UNet3DConditionMotionModel', | ||
unet_use_cross_frame_attention=False, | ||
unet_use_temporal_attention=False, | ||
use_motion_module=True, | ||
motion_module_resolutions=[1, 2, 4, 8], | ||
motion_module_mid_block=False, | ||
motion_module_decoder_only=False, | ||
motion_module_type='Vanilla', | ||
motion_module_kwargs=dict( | ||
num_attention_heads=8, | ||
num_transformer_block=1, | ||
attention_block_types=['Temporal_Self', 'Temporal_Self'], | ||
temporal_position_encoding=True, | ||
temporal_position_encoding_max_len=24, | ||
temporal_attention_dim_div=1), | ||
subfolder='unet', | ||
from_pretrained=stable_diffusion_v15_url), | ||
text_encoder=dict( | ||
type='ClipWrapper', | ||
clip_type='huggingface', | ||
pretrained_model_name_or_path=stable_diffusion_v15_url, | ||
subfolder='text_encoder'), | ||
tokenizer=stable_diffusion_v15_url, | ||
scheduler=diffusion_scheduler, | ||
test_scheduler=diffusion_scheduler, | ||
data_preprocessor=dict(type='DataPreprocessor'), | ||
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'), | ||
dream_booth_lora_cfg=dict( | ||
type='ToonYou', | ||
path=models_path + | ||
'DreamBooth_LoRA/majicmixRealistic_v5Preview.safetensors', | ||
steps=25, | ||
guidance_scale=7.5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# config for model | ||
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5' | ||
models_path = '/home/AnimateDiff/models/' | ||
randomness = dict( | ||
seed=[ | ||
16931037867122267877, 2094308009433392066, 4292543217695451092, | ||
15572665120852309890 | ||
], | ||
diff_rank_seed=True) | ||
|
||
diffusion_scheduler = dict( | ||
type='DDIMScheduler', | ||
beta_end=0.012, | ||
beta_schedule='linear', | ||
beta_start=0.00085, | ||
num_train_timesteps=1000, | ||
prediction_type='epsilon', | ||
set_alpha_to_one=True, | ||
clip_sample=False, | ||
thresholding=False, | ||
steps_offset=1) | ||
|
||
model = dict( | ||
type='AnimateDiff', | ||
vae=dict( | ||
type='AutoencoderKL', | ||
from_pretrained=stable_diffusion_v15_url, | ||
subfolder='vae'), | ||
unet=dict( | ||
type='UNet3DConditionMotionModel', | ||
unet_use_cross_frame_attention=False, | ||
unet_use_temporal_attention=False, | ||
use_motion_module=True, | ||
motion_module_resolutions=[1, 2, 4, 8], | ||
motion_module_mid_block=False, | ||
motion_module_decoder_only=False, | ||
motion_module_type='Vanilla', | ||
motion_module_kwargs=dict( | ||
num_attention_heads=8, | ||
num_transformer_block=1, | ||
attention_block_types=['Temporal_Self', 'Temporal_Self'], | ||
temporal_position_encoding=True, | ||
temporal_position_encoding_max_len=24, | ||
temporal_attention_dim_div=1), | ||
subfolder='unet', | ||
from_pretrained=stable_diffusion_v15_url), | ||
text_encoder=dict( | ||
type='ClipWrapper', | ||
clip_type='huggingface', | ||
pretrained_model_name_or_path=stable_diffusion_v15_url, | ||
subfolder='text_encoder'), | ||
tokenizer=stable_diffusion_v15_url, | ||
scheduler=diffusion_scheduler, | ||
test_scheduler=diffusion_scheduler, | ||
data_preprocessor=dict(type='DataPreprocessor'), | ||
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'), | ||
dream_booth_lora_cfg=dict( | ||
type='ToonYou', | ||
path=models_path + 'DreamBooth_LoRA/rcnzCartoon3d_v10.safetensors', | ||
steps=25, | ||
guidance_scale=7.5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# config for model | ||
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5' | ||
models_path = '/home/AnimateDiff/models/' | ||
randomness = dict( | ||
seed=[ | ||
5658137986800322009, 12099779162349365895, 10499524853910852697, | ||
16768009035333711932 | ||
], | ||
diff_rank_seed=True) | ||
|
||
diffusion_scheduler = dict( | ||
type='DDIMScheduler', | ||
beta_end=0.012, | ||
beta_schedule='linear', | ||
beta_start=0.00085, | ||
num_train_timesteps=1000, | ||
prediction_type='epsilon', | ||
set_alpha_to_one=True, | ||
clip_sample=False, | ||
thresholding=False, | ||
steps_offset=1) | ||
|
||
model = dict( | ||
type='AnimateDiff', | ||
vae=dict( | ||
type='AutoencoderKL', | ||
from_pretrained=stable_diffusion_v15_url, | ||
subfolder='vae'), | ||
unet=dict( | ||
type='UNet3DConditionMotionModel', | ||
unet_use_cross_frame_attention=False, | ||
unet_use_temporal_attention=False, | ||
use_motion_module=True, | ||
motion_module_resolutions=[1, 2, 4, 8], | ||
motion_module_mid_block=False, | ||
motion_module_decoder_only=False, | ||
motion_module_type='Vanilla', | ||
motion_module_kwargs=dict( | ||
num_attention_heads=8, | ||
num_transformer_block=1, | ||
attention_block_types=['Temporal_Self', 'Temporal_Self'], | ||
temporal_position_encoding=True, | ||
temporal_position_encoding_max_len=24, | ||
temporal_attention_dim_div=1), | ||
subfolder='unet', | ||
from_pretrained=stable_diffusion_v15_url), | ||
text_encoder=dict( | ||
type='ClipWrapper', | ||
clip_type='huggingface', | ||
pretrained_model_name_or_path=stable_diffusion_v15_url, | ||
subfolder='text_encoder'), | ||
tokenizer=stable_diffusion_v15_url, | ||
scheduler=diffusion_scheduler, | ||
test_scheduler=diffusion_scheduler, | ||
data_preprocessor=dict(type='DataPreprocessor'), | ||
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'), | ||
dream_booth_lora_cfg=dict( | ||
type='ToonYou', | ||
path=models_path + | ||
'DreamBooth_LoRA/realisticVisionV20_v20.safetensors', | ||
steps=25, | ||
guidance_scale=7.5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# config for model | ||
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5' | ||
models_path = '/home/AnimateDiff/models/' | ||
randomness = dict( | ||
seed=[ | ||
13100322578370451493, 14752961627088720670, 9329399085567825781, | ||
16987697414827649302 | ||
], | ||
diff_rank_seed=True) | ||
|
||
diffusion_scheduler = dict( | ||
type='DDIMScheduler', | ||
beta_end=0.012, | ||
beta_schedule='linear', | ||
beta_start=0.00085, | ||
num_train_timesteps=1000, | ||
prediction_type='epsilon', | ||
set_alpha_to_one=True, | ||
clip_sample=False, | ||
thresholding=False, | ||
steps_offset=1) | ||
|
||
model = dict( | ||
type='AnimateDiff', | ||
vae=dict( | ||
type='AutoencoderKL', | ||
from_pretrained=stable_diffusion_v15_url, | ||
subfolder='vae'), | ||
unet=dict( | ||
type='UNet3DConditionMotionModel', | ||
use_inflated_groupnorm=True, | ||
unet_use_cross_frame_attention=False, | ||
unet_use_temporal_attention=False, | ||
use_motion_module=True, | ||
motion_module_resolutions=[1, 2, 4, 8], | ||
motion_module_mid_block=True, | ||
motion_module_decoder_only=False, | ||
motion_module_type='Vanilla', | ||
motion_module_kwargs=dict( | ||
num_attention_heads=8, | ||
num_transformer_block=1, | ||
attention_block_types=['Temporal_Self', 'Temporal_Self'], | ||
temporal_position_encoding=True, | ||
temporal_position_encoding_max_len=32, | ||
temporal_attention_dim_div=1), | ||
subfolder='unet', | ||
from_pretrained=stable_diffusion_v15_url), | ||
text_encoder=dict( | ||
type='ClipWrapper', | ||
clip_type='huggingface', | ||
pretrained_model_name_or_path=stable_diffusion_v15_url, | ||
subfolder='text_encoder'), | ||
tokenizer=stable_diffusion_v15_url, | ||
scheduler=diffusion_scheduler, | ||
test_scheduler=diffusion_scheduler, | ||
data_preprocessor=dict(type='DataPreprocessor'), | ||
motion_module_cfg=dict(path=models_path + | ||
'Motion_Module/mm_sd_v15_v2.ckpt'), | ||
dream_booth_lora_cfg=dict( | ||
type='ToonYou', | ||
path=models_path + | ||
'DreamBooth_LoRA/realisticVisionV20_v20.safetensors', | ||
steps=25, | ||
guidance_scale=7.5)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# config for model | ||
stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5' | ||
models_path = '/home/AnimateDiff/models/' | ||
randomness = dict( | ||
seed=[ | ||
10788741199826055526, 6520604954829636163, 6519455744612555650, | ||
16372571278361863751 | ||
], | ||
diff_rank_seed=True) | ||
|
||
val_prompts = [ | ||
'best quality, masterpiece, 1girl, looking at viewer,\ | ||
blurry background, upper body, contemporary, dress', | ||
'masterpiece, best quality, 1girl, solo, cherry blossoms,\ | ||
hanami, pink flower, white flower, spring season, wisteria,\ | ||
petals, flower, plum blossoms, outdoors, falling petals,\ | ||
white hair, black eyes,', | ||
'best quality, masterpiece, 1boy, formal, abstract,\ | ||
looking at viewer, masculine, marble pattern', | ||
'best quality, masterpiece, 1girl, cloudy sky,\ | ||
dandelion, contrapposto, alternate hairstyle,' | ||
] | ||
val_neg_propmts = [ | ||
'', | ||
'badhandv4,easynegative,ng_deepnegative_v1_75t,verybadimagenegative_v1.3,\ | ||
bad-artist, bad_prompt_version2-neg, teeth', | ||
'', | ||
'', | ||
] | ||
diffusion_scheduler = dict( | ||
type='DDIMScheduler', | ||
beta_end=0.012, | ||
beta_schedule='linear', | ||
beta_start=0.00085, | ||
num_train_timesteps=1000, | ||
prediction_type='epsilon', | ||
set_alpha_to_one=True, | ||
clip_sample=False, | ||
thresholding=False, | ||
steps_offset=1) | ||
|
||
model = dict( | ||
type='AnimateDiff', | ||
vae=dict( | ||
type='AutoencoderKL', | ||
from_pretrained=stable_diffusion_v15_url, | ||
subfolder='vae'), | ||
unet=dict( | ||
type='UNet3DConditionMotionModel', | ||
unet_use_cross_frame_attention=False, | ||
unet_use_temporal_attention=False, | ||
use_motion_module=True, | ||
motion_module_resolutions=[1, 2, 4, 8], | ||
motion_module_mid_block=False, | ||
motion_module_decoder_only=False, | ||
motion_module_type='Vanilla', | ||
motion_module_kwargs=dict( | ||
num_attention_heads=8, | ||
num_transformer_block=1, | ||
attention_block_types=['Temporal_Self', 'Temporal_Self'], | ||
temporal_position_encoding=True, | ||
temporal_position_encoding_max_len=24, | ||
temporal_attention_dim_div=1), | ||
subfolder='unet', | ||
from_pretrained=stable_diffusion_v15_url), | ||
text_encoder=dict( | ||
type='ClipWrapper', | ||
clip_type='huggingface', | ||
pretrained_model_name_or_path=stable_diffusion_v15_url, | ||
subfolder='text_encoder'), | ||
tokenizer=stable_diffusion_v15_url, | ||
scheduler=diffusion_scheduler, | ||
test_scheduler=diffusion_scheduler, | ||
data_preprocessor=dict(type='DataPreprocessor'), | ||
motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v14.ckpt'), | ||
dream_booth_lora_cfg=dict( | ||
type='ToonYou', | ||
path=models_path + 'DreamBooth_LoRA/toonyou_beta3.safetensors', | ||
steps=25, | ||
guidance_scale=7.5)) |
Oops, something went wrong.