diff --git a/Dockerfile b/Dockerfile index ebb00632..a72642d7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Use an official Python runtime as a parent image -FROM python:3.10-slim-bullseye +FROM python:3.11-slim-bullseye # Set the working directory in the container WORKDIR /MoneyPrinterTurbo diff --git a/README-en.md b/README-en.md index 20df1330..6a423a12 100644 --- a/README-en.md +++ b/README-en.md @@ -172,7 +172,7 @@ using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index ```shell git clone https://github.com/harry0703/MoneyPrinterTurbo.git cd MoneyPrinterTurbo -conda create -n MoneyPrinterTurbo python=3.10 +conda create -n MoneyPrinterTurbo python=3.11 conda activate MoneyPrinterTurbo pip install -r requirements.txt ``` diff --git a/README.md b/README.md index dafdedb2..36fe2700 100644 --- a/README.md +++ b/README.md @@ -193,7 +193,7 @@ docker-compose up ```shell git clone https://github.com/harry0703/MoneyPrinterTurbo.git cd MoneyPrinterTurbo -conda create -n MoneyPrinterTurbo python=3.10 +conda create -n MoneyPrinterTurbo python=3.11 conda activate MoneyPrinterTurbo pip install -r requirements.txt ``` diff --git a/app/models/schema.py b/app/models/schema.py index 2a77baf3..530ea720 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -1,6 +1,6 @@ import warnings from enum import Enum -from typing import Any, List, Optional +from typing import Any, List, Optional, Union import pydantic from pydantic import BaseModel @@ -122,7 +122,7 @@ class VideoParams(BaseModel): custom_position: float = 70.0 font_name: Optional[str] = "STHeitiMedium.ttc" text_fore_color: Optional[str] = "#FFFFFF" - text_background_color: Optional[str] = "transparent" + text_background_color: Union[bool, str] = True font_size: int = 60 stroke_color: Optional[str] = "#000000" @@ -143,7 +143,7 @@ class SubtitleRequest(BaseModel): subtitle_position: Optional[str] = "bottom" font_name: Optional[str] = "STHeitiMedium.ttc" text_fore_color: Optional[str] = "#FFFFFF" - text_background_color: Optional[str] = "transparent" + text_background_color: Union[bool, str] = True font_size: int = 60 stroke_color: Optional[str] = "#000000" stroke_width: float = 1.5 diff --git a/app/services/video.py b/app/services/video.py index a1070063..afa77670 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -1,9 +1,10 @@ import glob +import os import random from typing import List from loguru import logger -from moviepy.editor import * +from moviepy import * from moviepy.video.tools.subtitles import SubtitlesClip from PIL import ImageFont @@ -60,7 +61,7 @@ def combine_videos( while start_time < clip_duration: end_time = min(start_time + max_clip_duration, clip_duration) - split_clip = clip.subclip(start_time, end_time) + split_clip = clip.subclipped(start_time, end_time) raw_clips.append(split_clip) # logger.info(f"splitting from {start_time:.2f} to {end_time:.2f}, clip duration {clip_duration:.2f}, split_clip duration {split_clip.duration:.2f}") start_time = end_time @@ -76,11 +77,11 @@ def combine_videos( for clip in raw_clips: # Check if clip is longer than the remaining audio if (audio_duration - video_duration) < clip.duration: - clip = clip.subclip(0, (audio_duration - video_duration)) + clip = clip.subclipped(0, (audio_duration - video_duration)) # Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image elif req_dur < clip.duration: - clip = clip.subclip(0, req_dur) - clip = clip.set_fps(30) + clip = clip.subclipped(0, req_dur) + clip = clip.with_fps(30) # Not all videos are same size, so we need to resize them clip_w, clip_h = clip.size @@ -90,7 +91,7 @@ def combine_videos( if clip_ratio == video_ratio: # 等比例缩放 - clip = clip.resize((video_width, video_height)) + clip = clip.resized((video_width, video_height)) else: # 等比缩放视频 if clip_ratio > video_ratio: @@ -102,15 +103,15 @@ def combine_videos( new_width = int(clip_w * scale_factor) new_height = int(clip_h * scale_factor) - clip_resized = clip.resize(newsize=(new_width, new_height)) + clip_resized = clip.resized(new_size=(new_width, new_height)) background = ColorClip( size=(video_width, video_height), color=(0, 0, 0) ) clip = CompositeVideoClip( [ - background.set_duration(clip.duration), - clip_resized.set_position("center"), + background.with_duration(clip.duration), + clip_resized.with_position("center"), ] ) @@ -119,13 +120,13 @@ def combine_videos( ) if clip.duration > max_clip_duration: - clip = clip.subclip(0, max_clip_duration) + clip = clip.subclipped(0, max_clip_duration) clips.append(clip) video_duration += clip.duration video_clip = concatenate_videoclips(clips) - video_clip = video_clip.set_fps(30) + video_clip = video_clip.with_fps(30) logger.info("writing") # https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030 video_clip.write_videofile( @@ -231,29 +232,30 @@ def generate_video( logger.info(f"using font: {font_path}") def create_text_clip(subtitle_item): + params.font_size = int(params.font_size) + params.stroke_width = int(params.stroke_width) phrase = subtitle_item[1] max_width = video_width * 0.9 wrapped_txt, txt_height = wrap_text( phrase, max_width=max_width, font=font_path, fontsize=params.font_size ) _clip = TextClip( - wrapped_txt, + text=wrapped_txt, font=font_path, - fontsize=params.font_size, + font_size=params.font_size, color=params.text_fore_color, bg_color=params.text_background_color, stroke_color=params.stroke_color, stroke_width=params.stroke_width, - print_cmd=False, ) duration = subtitle_item[0][1] - subtitle_item[0][0] - _clip = _clip.set_start(subtitle_item[0][0]) - _clip = _clip.set_end(subtitle_item[0][1]) - _clip = _clip.set_duration(duration) + _clip = _clip.with_start(subtitle_item[0][0]) + _clip = _clip.with_end(subtitle_item[0][1]) + _clip = _clip.with_duration(duration) if params.subtitle_position == "bottom": - _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h)) + _clip = _clip.with_position(("center", video_height * 0.95 - _clip.h)) elif params.subtitle_position == "top": - _clip = _clip.set_position(("center", video_height * 0.05)) + _clip = _clip.with_position(("center", video_height * 0.05)) elif params.subtitle_position == "custom": # 确保字幕完全在屏幕内 margin = 10 # 额外的边距,单位为像素 @@ -261,16 +263,25 @@ def create_text_clip(subtitle_item): min_y = margin custom_y = (video_height - _clip.h) * (params.custom_position / 100) custom_y = max(min_y, min(custom_y, max_y)) # 限制 y 值在有效范围内 - _clip = _clip.set_position(("center", custom_y)) + _clip = _clip.with_position(("center", custom_y)) else: # center - _clip = _clip.set_position(("center", "center")) + _clip = _clip.with_position(("center", "center")) return _clip video_clip = VideoFileClip(video_path) - audio_clip = AudioFileClip(audio_path).volumex(params.voice_volume) + audio_clip = AudioFileClip(audio_path).with_effects( + [afx.MultiplyVolume(params.voice_volume)] + ) if subtitle_path and os.path.exists(subtitle_path): - sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8") + generator = lambda text: TextClip( + text=text, + font=font_path, + font_size=params.font_size, + ) + sub = SubtitlesClip( + subtitles=subtitle_path, encoding="utf-8", make_textclip=generator + ) text_clips = [] for item in sub.subtitles: clip = create_text_clip(subtitle_item=item) @@ -280,15 +291,18 @@ def create_text_clip(subtitle_item): bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file) if bgm_file: try: - bgm_clip = ( - AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3) + bgm_clip = AudioFileClip(bgm_file).with_effects( + [ + afx.MultiplyVolume(params.voice_volume), + afx.AudioFadeOut(3), + afx.AudioLoop(duration=video_clip.duration), + ] ) - bgm_clip = afx.audio_loop(bgm_clip, duration=video_clip.duration) audio_clip = CompositeAudioClip([audio_clip, bgm_clip]) except Exception as e: logger.error(f"failed to add bgm: {str(e)}") - video_clip = video_clip.set_audio(audio_clip) + video_clip = video_clip.with_audio(audio_clip) video_clip.write_videofile( output_file, audio_codec="aac", @@ -324,14 +338,14 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4): # 创建一个图片剪辑,并设置持续时间为3秒钟 clip = ( ImageClip(material.url) - .set_duration(clip_duration) - .set_position("center") + .with_duration(clip_duration) + .with_position("center") ) # 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。 # 假设我们想要从原始大小逐渐放大到120%的大小。 # t代表当前时间,clip.duration为视频总时长,这里是3秒。 # 注意:1 表示100%的大小,所以1.2表示120%的大小 - zoom_clip = clip.resize( + zoom_clip = clip.resized( lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration) ) diff --git a/app/services/voice.py b/app/services/voice.py index 287e22d7..8a8c89f1 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -302,21 +302,33 @@ def get_all_azure_voices(filter_locals=None) -> list[str]: Name: en-US-AnaNeural Gender: Female +Name: en-US-AndrewMultilingualNeural +Gender: Male + Name: en-US-AndrewNeural Gender: Male Name: en-US-AriaNeural Gender: Female +Name: en-US-AvaMultilingualNeural +Gender: Female + Name: en-US-AvaNeural Gender: Female +Name: en-US-BrianMultilingualNeural +Gender: Male + Name: en-US-BrianNeural Gender: Male Name: en-US-ChristopherNeural Gender: Male +Name: en-US-EmmaMultilingualNeural +Gender: Female + Name: en-US-EmmaNeural Gender: Female @@ -602,12 +614,24 @@ def get_all_azure_voices(filter_locals=None) -> list[str]: Name: it-IT-ElsaNeural Gender: Female -Name: it-IT-GiuseppeNeural +Name: it-IT-GiuseppeMultilingualNeural Gender: Male Name: it-IT-IsabellaNeural Gender: Female +Name: iu-Cans-CA-SiqiniqNeural +Gender: Female + +Name: iu-Cans-CA-TaqqiqNeural +Gender: Male + +Name: iu-Latn-CA-SiqiniqNeural +Gender: Female + +Name: iu-Latn-CA-TaqqiqNeural +Gender: Male + Name: ja-JP-KeitaNeural Gender: Male @@ -644,7 +668,7 @@ def get_all_azure_voices(filter_locals=None) -> list[str]: Name: kn-IN-SapnaNeural Gender: Female -Name: ko-KR-HyunsuNeural +Name: ko-KR-HyunsuMultilingualNeural Gender: Male Name: ko-KR-InJoonNeural @@ -758,7 +782,7 @@ def get_all_azure_voices(filter_locals=None) -> list[str]: Name: pt-BR-FranciscaNeural Gender: Female -Name: pt-BR-ThalitaNeural +Name: pt-BR-ThalitaMultilingualNeural Gender: Female Name: pt-PT-DuarteNeural diff --git a/docs/api.jpg b/docs/api.jpg index e9a41225..769dc796 100644 Binary files a/docs/api.jpg and b/docs/api.jpg differ diff --git a/docs/webui-en.jpg b/docs/webui-en.jpg index d68245cd..aa4c735a 100644 Binary files a/docs/webui-en.jpg and b/docs/webui-en.jpg differ diff --git a/docs/webui.jpg b/docs/webui.jpg index 387102f3..4ddea7a9 100644 Binary files a/docs/webui.jpg and b/docs/webui.jpg differ diff --git a/requirements.txt b/requirements.txt index 25da97c0..3cc84abd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,26 +1,14 @@ -requests~=2.31.0 -moviepy==2.0.0.dev2 -openai~=1.13.3 -faster-whisper~=1.0.1 -edge_tts~=6.1.10 -uvicorn~=0.27.1 -fastapi~=0.110.0 -tomli~=2.0.1 -streamlit~=1.33.0 -loguru~=0.7.2 -aiohttp~=3.9.3 -urllib3~=2.2.1 -pillow~=10.3.0 -pydantic~=2.6.3 -g4f~=0.3.0.4 -dashscope~=1.15.0 -google.generativeai~=0.4.1 -python-multipart~=0.0.9 -redis==5.0.3 -# if you use pillow~=10.3.0, you will get "PIL.Image' has no attribute 'ANTIALIAS'" error when resize video -# please install opencv-python to fix "PIL.Image' has no attribute 'ANTIALIAS'" error -opencv-python~=4.9.0.80 -# for azure speech -# https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/9-more-realistic-ai-voices-for-conversations-now-generally/ba-p/4099471 -azure-cognitiveservices-speech~=1.37.0 -git-changelog~=2.5.2 +moviepy==2.1.1 +streamlit==1.40.2 +edge_tts==6.1.19 +fastapi==0.115.6 +uvicorn==0.32.1 +openai==1.56.1 +faster-whisper==1.1.0 +loguru==0.7.2 +google.generativeai==0.8.3 +dashscope==1.20.14 +g4f==0.3.8.1 +azure-cognitiveservices-speech==1.41.1 +redis==5.2.0 +python-multipart==0.0.19 \ No newline at end of file diff --git a/webui/Main.py b/webui/Main.py index d3138f78..a60431e1 100644 --- a/webui/Main.py +++ b/webui/Main.py @@ -479,7 +479,7 @@ def save_keys_to_config(cfg_key, value): st.session_state["video_terms"] = ", ".join(terms) params.video_terms = st.text_area( - tr("Video Keywords"), value=st.session_state["video_terms"], height=50 + tr("Video Keywords"), value=st.session_state["video_terms"] ) with middle_panel: