From ca5ac35790cd9cea8c493f1571933689f979cf2f Mon Sep 17 00:00:00 2001 From: wwbitejotunn Date: Tue, 7 Nov 2023 23:41:05 +0800 Subject: [PATCH 1/3] fix sd deploy --- ppdiffusers/deploy/export_model.py | 4 ++++ ppdiffusers/deploy/infer.py | 21 +++++++++++-------- .../pipeline_fastdeploy_stable_diffusion.py | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/ppdiffusers/deploy/export_model.py b/ppdiffusers/deploy/export_model.py index 230fbd447..583bf6fe0 100644 --- a/ppdiffusers/deploy/export_model.py +++ b/ppdiffusers/deploy/export_model.py @@ -42,6 +42,10 @@ def convert_ppdiffusers_pipeline_to_fastdeploy_pipeline( pipeline = StableDiffusionPipeline.from_pretrained( model_path, unet=unet_model, safety_checker=None, feature_extractor=None ) + # make sure we disable xformers + pipeline.disable_xformers_memory_efficient_attention() + pipeline.unet.set_default_attn_processor() + pipeline.vae.set_default_attn_processor() output_path = Path(output_path) # calculate latent's H and W latent_height = height // 8 if height is not None else None diff --git a/ppdiffusers/deploy/infer.py b/ppdiffusers/deploy/infer.py index 3b3e1bb63..efc45463b 100644 --- a/ppdiffusers/deploy/infer.py +++ b/ppdiffusers/deploy/infer.py @@ -160,11 +160,12 @@ def create_paddle_inference_runtime( disable_paddle_trt_ops=[], disable_paddle_pass=[], paddle_stream=None, - workspace=None, + workspace=(2<<31) * 4, ): assert not use_fp16 or not use_bf16, "use_fp16 and use_bf16 are mutually exclusive" option = fd.RuntimeOption() option.use_paddle_backend() + # option.enable_paddle_log_info() if device_id == -1: option.use_cpu() else: @@ -178,6 +179,7 @@ def create_paddle_inference_runtime( if use_trt: option.paddle_infer_option.disable_trt_ops(disable_paddle_trt_ops) option.paddle_infer_option.enable_trt = True + print(f"##### workspace {workspace}") if workspace is not None: option.set_trt_max_workspace_size(workspace) if use_fp16: @@ -378,6 +380,15 @@ def main(args): elif args.backend == "paddle" or args.backend == "paddle_tensorrt": args.use_trt = args.backend == "paddle_tensorrt" runtime_options = dict( + unet=create_paddle_inference_runtime( + use_trt=args.use_trt, + dynamic_shape=unet_dynamic_shape, + use_fp16=args.use_fp16, + use_bf16=args.use_bf16, + device_id=args.device_id, + paddle_stream=paddle_stream, + workspace=20*1024*1024*1024 + ), text_encoder=create_paddle_inference_runtime( use_trt=args.use_trt, dynamic_shape=text_encoder_dynamic_shape, @@ -403,14 +414,6 @@ def main(args): device_id=args.device_id, paddle_stream=paddle_stream, ), - unet=create_paddle_inference_runtime( - use_trt=args.use_trt, - dynamic_shape=unet_dynamic_shape, - use_fp16=args.use_fp16, - use_bf16=args.use_bf16, - device_id=args.device_id, - paddle_stream=paddle_stream, - ), ) pipe = FastDeployStableDiffusionMegaPipeline.from_pretrained( args.model_dir, diff --git a/ppdiffusers/ppdiffusers/pipelines/stable_diffusion/pipeline_fastdeploy_stable_diffusion.py b/ppdiffusers/ppdiffusers/pipelines/stable_diffusion/pipeline_fastdeploy_stable_diffusion.py index 319346858..a29bb65ee 100644 --- a/ppdiffusers/ppdiffusers/pipelines/stable_diffusion/pipeline_fastdeploy_stable_diffusion.py +++ b/ppdiffusers/ppdiffusers/pipelines/stable_diffusion/pipeline_fastdeploy_stable_diffusion.py @@ -272,7 +272,7 @@ def __call__( unet_inputs = dict( sample=latent_model_input, - timestep=t, + timestep=paddle.to_tensor(t).reshape([1,]), encoder_hidden_states=prompt_embeds, infer_op=infer_op_dict.get("unet", None), output_shape=latent_model_input.shape, From c5d9c39169790b05ad4eb85f67df858a3512757b Mon Sep 17 00:00:00 2001 From: wwbitejotunn Date: Fri, 10 Nov 2023 16:52:22 +0800 Subject: [PATCH 2/3] adjust trt workspace --- ppdiffusers/deploy/infer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ppdiffusers/deploy/infer.py b/ppdiffusers/deploy/infer.py index efc45463b..c47c84bd1 100644 --- a/ppdiffusers/deploy/infer.py +++ b/ppdiffusers/deploy/infer.py @@ -160,12 +160,14 @@ def create_paddle_inference_runtime( disable_paddle_trt_ops=[], disable_paddle_pass=[], paddle_stream=None, - workspace=(2<<31) * 4, + workspace=None, + show_log=False ): assert not use_fp16 or not use_bf16, "use_fp16 and use_bf16 are mutually exclusive" option = fd.RuntimeOption() option.use_paddle_backend() - # option.enable_paddle_log_info() + if show_log: + option.enable_paddle_log_info() if device_id == -1: option.use_cpu() else: @@ -387,7 +389,7 @@ def main(args): use_bf16=args.use_bf16, device_id=args.device_id, paddle_stream=paddle_stream, - workspace=20*1024*1024*1024 + workspace=10*1024*1024*1024 ), text_encoder=create_paddle_inference_runtime( use_trt=args.use_trt, @@ -397,6 +399,7 @@ def main(args): device_id=args.device_id, disable_paddle_trt_ops=["arg_max", "range", "lookup_table_v2"], paddle_stream=paddle_stream, + workspace=10*1024*1024*1024 ), vae_encoder=create_paddle_inference_runtime( use_trt=args.use_trt, @@ -405,6 +408,7 @@ def main(args): use_bf16=args.use_bf16, device_id=args.device_id, paddle_stream=paddle_stream, + workspace=10*1024*1024*1024 ), vae_decoder=create_paddle_inference_runtime( use_trt=args.use_trt, @@ -413,6 +417,7 @@ def main(args): use_bf16=args.use_bf16, device_id=args.device_id, paddle_stream=paddle_stream, + workspace=10*1024*1024*1024 ), ) pipe = FastDeployStableDiffusionMegaPipeline.from_pretrained( From 0f53de6d168a43a2d27af5ed3d7ee6b61f4f0016 Mon Sep 17 00:00:00 2001 From: wwbitejotunn Date: Fri, 10 Nov 2023 16:56:11 +0800 Subject: [PATCH 3/3] remove log --- ppdiffusers/deploy/infer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ppdiffusers/deploy/infer.py b/ppdiffusers/deploy/infer.py index c47c84bd1..d90dd7165 100644 --- a/ppdiffusers/deploy/infer.py +++ b/ppdiffusers/deploy/infer.py @@ -181,7 +181,6 @@ def create_paddle_inference_runtime( if use_trt: option.paddle_infer_option.disable_trt_ops(disable_paddle_trt_ops) option.paddle_infer_option.enable_trt = True - print(f"##### workspace {workspace}") if workspace is not None: option.set_trt_max_workspace_size(workspace) if use_fp16: