diff --git a/Dockerfile b/Dockerfile
index ebb00632..a72642d7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
 # Use an official Python runtime as a parent image
-FROM python:3.10-slim-bullseye
+FROM python:3.11-slim-bullseye
 
 # Set the working directory in the container
 WORKDIR /MoneyPrinterTurbo
diff --git a/README-en.md b/README-en.md
index 20df1330..6a423a12 100644
--- a/README-en.md
+++ b/README-en.md
@@ -172,7 +172,7 @@ using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index
 ```shell
 git clone https://github.com/harry0703/MoneyPrinterTurbo.git
 cd MoneyPrinterTurbo
-conda create -n MoneyPrinterTurbo python=3.10
+conda create -n MoneyPrinterTurbo python=3.11
 conda activate MoneyPrinterTurbo
 pip install -r requirements.txt
 ```
diff --git a/README.md b/README.md
index dafdedb2..36fe2700 100644
--- a/README.md
+++ b/README.md
@@ -193,7 +193,7 @@ docker-compose up
 ```shell
 git clone https://github.com/harry0703/MoneyPrinterTurbo.git
 cd MoneyPrinterTurbo
-conda create -n MoneyPrinterTurbo python=3.10
+conda create -n MoneyPrinterTurbo python=3.11
 conda activate MoneyPrinterTurbo
 pip install -r requirements.txt
 ```
diff --git a/app/models/schema.py b/app/models/schema.py
index 2a77baf3..530ea720 100644
--- a/app/models/schema.py
+++ b/app/models/schema.py
@@ -1,6 +1,6 @@
 import warnings
 from enum import Enum
-from typing import Any, List, Optional
+from typing import Any, List, Optional, Union
 
 import pydantic
 from pydantic import BaseModel
@@ -122,7 +122,7 @@ class VideoParams(BaseModel):
     custom_position: float = 70.0
     font_name: Optional[str] = "STHeitiMedium.ttc"
     text_fore_color: Optional[str] = "#FFFFFF"
-    text_background_color: Optional[str] = "transparent"
+    text_background_color: Union[bool, str] = True
 
     font_size: int = 60
     stroke_color: Optional[str] = "#000000"
@@ -143,7 +143,7 @@ class SubtitleRequest(BaseModel):
     subtitle_position: Optional[str] = "bottom"
     font_name: Optional[str] = "STHeitiMedium.ttc"
     text_fore_color: Optional[str] = "#FFFFFF"
-    text_background_color: Optional[str] = "transparent"
+    text_background_color: Union[bool, str] = True
     font_size: int = 60
     stroke_color: Optional[str] = "#000000"
     stroke_width: float = 1.5
diff --git a/app/services/video.py b/app/services/video.py
index a1070063..afa77670 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -1,9 +1,10 @@
 import glob
+import os
 import random
 from typing import List
 
 from loguru import logger
-from moviepy.editor import *
+from moviepy import *
 from moviepy.video.tools.subtitles import SubtitlesClip
 from PIL import ImageFont
 
@@ -60,7 +61,7 @@ def combine_videos(
 
         while start_time < clip_duration:
             end_time = min(start_time + max_clip_duration, clip_duration)
-            split_clip = clip.subclip(start_time, end_time)
+            split_clip = clip.subclipped(start_time, end_time)
             raw_clips.append(split_clip)
             # logger.info(f"splitting from {start_time:.2f} to {end_time:.2f}, clip duration {clip_duration:.2f}, split_clip duration {split_clip.duration:.2f}")
             start_time = end_time
@@ -76,11 +77,11 @@ def combine_videos(
         for clip in raw_clips:
             # Check if clip is longer than the remaining audio
             if (audio_duration - video_duration) < clip.duration:
-                clip = clip.subclip(0, (audio_duration - video_duration))
+                clip = clip.subclipped(0, (audio_duration - video_duration))
             # Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
             elif req_dur < clip.duration:
-                clip = clip.subclip(0, req_dur)
-            clip = clip.set_fps(30)
+                clip = clip.subclipped(0, req_dur)
+            clip = clip.with_fps(30)
 
             # Not all videos are same size, so we need to resize them
             clip_w, clip_h = clip.size
@@ -90,7 +91,7 @@ def combine_videos(
 
                 if clip_ratio == video_ratio:
                     # 等比例缩放
-                    clip = clip.resize((video_width, video_height))
+                    clip = clip.resized((video_width, video_height))
                 else:
                     # 等比缩放视频
                     if clip_ratio > video_ratio:
@@ -102,15 +103,15 @@ def combine_videos(
 
                     new_width = int(clip_w * scale_factor)
                     new_height = int(clip_h * scale_factor)
-                    clip_resized = clip.resize(newsize=(new_width, new_height))
+                    clip_resized = clip.resized(new_size=(new_width, new_height))
 
                     background = ColorClip(
                         size=(video_width, video_height), color=(0, 0, 0)
                     )
                     clip = CompositeVideoClip(
                         [
-                            background.set_duration(clip.duration),
-                            clip_resized.set_position("center"),
+                            background.with_duration(clip.duration),
+                            clip_resized.with_position("center"),
                         ]
                     )
 
@@ -119,13 +120,13 @@ def combine_videos(
                 )
 
             if clip.duration > max_clip_duration:
-                clip = clip.subclip(0, max_clip_duration)
+                clip = clip.subclipped(0, max_clip_duration)
 
             clips.append(clip)
             video_duration += clip.duration
 
     video_clip = concatenate_videoclips(clips)
-    video_clip = video_clip.set_fps(30)
+    video_clip = video_clip.with_fps(30)
     logger.info("writing")
     # https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030
     video_clip.write_videofile(
@@ -231,29 +232,30 @@ def generate_video(
         logger.info(f"using font: {font_path}")
 
     def create_text_clip(subtitle_item):
+        params.font_size = int(params.font_size)
+        params.stroke_width = int(params.stroke_width)
         phrase = subtitle_item[1]
         max_width = video_width * 0.9
         wrapped_txt, txt_height = wrap_text(
             phrase, max_width=max_width, font=font_path, fontsize=params.font_size
         )
         _clip = TextClip(
-            wrapped_txt,
+            text=wrapped_txt,
             font=font_path,
-            fontsize=params.font_size,
+            font_size=params.font_size,
             color=params.text_fore_color,
             bg_color=params.text_background_color,
             stroke_color=params.stroke_color,
             stroke_width=params.stroke_width,
-            print_cmd=False,
         )
         duration = subtitle_item[0][1] - subtitle_item[0][0]
-        _clip = _clip.set_start(subtitle_item[0][0])
-        _clip = _clip.set_end(subtitle_item[0][1])
-        _clip = _clip.set_duration(duration)
+        _clip = _clip.with_start(subtitle_item[0][0])
+        _clip = _clip.with_end(subtitle_item[0][1])
+        _clip = _clip.with_duration(duration)
         if params.subtitle_position == "bottom":
-            _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
+            _clip = _clip.with_position(("center", video_height * 0.95 - _clip.h))
         elif params.subtitle_position == "top":
-            _clip = _clip.set_position(("center", video_height * 0.05))
+            _clip = _clip.with_position(("center", video_height * 0.05))
         elif params.subtitle_position == "custom":
             # 确保字幕完全在屏幕内
             margin = 10  # 额外的边距，单位为像素
@@ -261,16 +263,25 @@ def create_text_clip(subtitle_item):
             min_y = margin
             custom_y = (video_height - _clip.h) * (params.custom_position / 100)
             custom_y = max(min_y, min(custom_y, max_y))  # 限制 y 值在有效范围内
-            _clip = _clip.set_position(("center", custom_y))
+            _clip = _clip.with_position(("center", custom_y))
         else:  # center
-            _clip = _clip.set_position(("center", "center"))
+            _clip = _clip.with_position(("center", "center"))
         return _clip
 
     video_clip = VideoFileClip(video_path)
-    audio_clip = AudioFileClip(audio_path).volumex(params.voice_volume)
+    audio_clip = AudioFileClip(audio_path).with_effects(
+        [afx.MultiplyVolume(params.voice_volume)]
+    )
 
     if subtitle_path and os.path.exists(subtitle_path):
-        sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
+        generator = lambda text: TextClip(
+            text=text,
+            font=font_path,
+            font_size=params.font_size,
+        )
+        sub = SubtitlesClip(
+            subtitles=subtitle_path, encoding="utf-8", make_textclip=generator
+        )
         text_clips = []
         for item in sub.subtitles:
             clip = create_text_clip(subtitle_item=item)
@@ -280,15 +291,18 @@ def create_text_clip(subtitle_item):
     bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
     if bgm_file:
         try:
-            bgm_clip = (
-                AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
+            bgm_clip = AudioFileClip(bgm_file).with_effects(
+                [
+                    afx.MultiplyVolume(params.voice_volume),
+                    afx.AudioFadeOut(3),
+                    afx.AudioLoop(duration=video_clip.duration),
+                ]
             )
-            bgm_clip = afx.audio_loop(bgm_clip, duration=video_clip.duration)
             audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
         except Exception as e:
             logger.error(f"failed to add bgm: {str(e)}")
 
-    video_clip = video_clip.set_audio(audio_clip)
+    video_clip = video_clip.with_audio(audio_clip)
     video_clip.write_videofile(
         output_file,
         audio_codec="aac",
@@ -324,14 +338,14 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
             # 创建一个图片剪辑，并设置持续时间为3秒钟
             clip = (
                 ImageClip(material.url)
-                .set_duration(clip_duration)
-                .set_position("center")
+                .with_duration(clip_duration)
+                .with_position("center")
             )
             # 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。
             # 假设我们想要从原始大小逐渐放大到120%的大小。
             # t代表当前时间，clip.duration为视频总时长，这里是3秒。
             # 注意：1 表示100%的大小，所以1.2表示120%的大小
-            zoom_clip = clip.resize(
+            zoom_clip = clip.resized(
                 lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
             )
 
diff --git a/app/services/voice.py b/app/services/voice.py
index 287e22d7..8a8c89f1 100644
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -302,21 +302,33 @@ def get_all_azure_voices(filter_locals=None) -> list[str]:
 Name: en-US-AnaNeural
 Gender: Female
 
+Name: en-US-AndrewMultilingualNeural
+Gender: Male
+
 Name: en-US-AndrewNeural
 Gender: Male
 
 Name: en-US-AriaNeural
 Gender: Female
 
+Name: en-US-AvaMultilingualNeural
+Gender: Female
+
 Name: en-US-AvaNeural
 Gender: Female
 
+Name: en-US-BrianMultilingualNeural
+Gender: Male
+
 Name: en-US-BrianNeural
 Gender: Male
 
 Name: en-US-ChristopherNeural
 Gender: Male
 
+Name: en-US-EmmaMultilingualNeural
+Gender: Female
+
 Name: en-US-EmmaNeural
 Gender: Female
 
@@ -602,12 +614,24 @@ def get_all_azure_voices(filter_locals=None) -> list[str]:
 Name: it-IT-ElsaNeural
 Gender: Female
 
-Name: it-IT-GiuseppeNeural
+Name: it-IT-GiuseppeMultilingualNeural
 Gender: Male
 
 Name: it-IT-IsabellaNeural
 Gender: Female
 
+Name: iu-Cans-CA-SiqiniqNeural
+Gender: Female
+
+Name: iu-Cans-CA-TaqqiqNeural
+Gender: Male
+
+Name: iu-Latn-CA-SiqiniqNeural
+Gender: Female
+
+Name: iu-Latn-CA-TaqqiqNeural
+Gender: Male
+
 Name: ja-JP-KeitaNeural
 Gender: Male
 
@@ -644,7 +668,7 @@ def get_all_azure_voices(filter_locals=None) -> list[str]:
 Name: kn-IN-SapnaNeural
 Gender: Female
 
-Name: ko-KR-HyunsuNeural
+Name: ko-KR-HyunsuMultilingualNeural
 Gender: Male
 
 Name: ko-KR-InJoonNeural
@@ -758,7 +782,7 @@ def get_all_azure_voices(filter_locals=None) -> list[str]:
 Name: pt-BR-FranciscaNeural
 Gender: Female
 
-Name: pt-BR-ThalitaNeural
+Name: pt-BR-ThalitaMultilingualNeural
 Gender: Female
 
 Name: pt-PT-DuarteNeural
diff --git a/docs/api.jpg b/docs/api.jpg
index e9a41225..769dc796 100644
Binary files a/docs/api.jpg and b/docs/api.jpg differ
diff --git a/docs/webui-en.jpg b/docs/webui-en.jpg
index d68245cd..aa4c735a 100644
Binary files a/docs/webui-en.jpg and b/docs/webui-en.jpg differ
diff --git a/docs/webui.jpg b/docs/webui.jpg
index 387102f3..4ddea7a9 100644
Binary files a/docs/webui.jpg and b/docs/webui.jpg differ
diff --git a/requirements.txt b/requirements.txt
index 25da97c0..3cc84abd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,26 +1,14 @@
-requests~=2.31.0
-moviepy==2.0.0.dev2
-openai~=1.13.3
-faster-whisper~=1.0.1
-edge_tts~=6.1.10
-uvicorn~=0.27.1
-fastapi~=0.110.0
-tomli~=2.0.1
-streamlit~=1.33.0
-loguru~=0.7.2
-aiohttp~=3.9.3
-urllib3~=2.2.1
-pillow~=10.3.0
-pydantic~=2.6.3
-g4f~=0.3.0.4
-dashscope~=1.15.0
-google.generativeai~=0.4.1
-python-multipart~=0.0.9
-redis==5.0.3
-# if you use pillow~=10.3.0, you will get "PIL.Image' has no attribute 'ANTIALIAS'" error when resize video
-# please install opencv-python to fix "PIL.Image' has no attribute 'ANTIALIAS'" error
-opencv-python~=4.9.0.80
-# for azure speech
-# https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/9-more-realistic-ai-voices-for-conversations-now-generally/ba-p/4099471
-azure-cognitiveservices-speech~=1.37.0
-git-changelog~=2.5.2
+moviepy==2.1.1
+streamlit==1.40.2
+edge_tts==6.1.19
+fastapi==0.115.6
+uvicorn==0.32.1
+openai==1.56.1
+faster-whisper==1.1.0
+loguru==0.7.2
+google.generativeai==0.8.3
+dashscope==1.20.14
+g4f==0.3.8.1
+azure-cognitiveservices-speech==1.41.1
+redis==5.2.0
+python-multipart==0.0.19
\ No newline at end of file
diff --git a/webui/Main.py b/webui/Main.py
index d3138f78..a60431e1 100644
--- a/webui/Main.py
+++ b/webui/Main.py
@@ -479,7 +479,7 @@ def save_keys_to_config(cfg_key, value):
                 st.session_state["video_terms"] = ", ".join(terms)
 
         params.video_terms = st.text_area(
-            tr("Video Keywords"), value=st.session_state["video_terms"], height=50
+            tr("Video Keywords"), value=st.session_state["video_terms"]
         )
 
 with middle_panel: