From f1c1f5f514c0a7bb2616a804d1f924c1bd40d03f Mon Sep 17 00:00:00 2001 From: fanpt <320622572@qq.com> Date: Wed, 12 Jun 2024 13:21:55 +0800 Subject: [PATCH] =?UTF-8?q?=E7=94=9F=E6=88=90=E8=A7=86=E9=A2=91=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E5=B0=81=E8=A3=85=E6=88=90=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- image_enhancer.py | 24 +++++++++ inference.py | 10 ++-- main.py | 124 +++++++++++++++++++++++++++++++++++++++++++ src/utils/videoio.py | 2 +- text_wav.py | 50 +++++++++++++++++ 5 files changed, 204 insertions(+), 6 deletions(-) create mode 100644 image_enhancer.py create mode 100644 main.py create mode 100644 text_wav.py diff --git a/image_enhancer.py b/image_enhancer.py new file mode 100644 index 0000000..8dc2eec --- /dev/null +++ b/image_enhancer.py @@ -0,0 +1,24 @@ + +from PIL import Image, ImageEnhance, ImageFilter + +# 打开图像文件 +image_path = r'E:\SadTalker\examples\source_image\91a54181-568f-4cda-8f3c-0f2c811eaf20.jpg' +image = Image.open(image_path) + +# 创建ImageEnhance.Color对象 +color_enhancer = ImageEnhance.Color(image) + +# 增加颜色饱和度(这将给图像添加血色) +# 注意:这个值需要根据你的图像进行调整 +saturation_factor = 1.5 # 增加50%的饱和度 +color_enhancer = ImageEnhance.Color(image) +color_image = color_enhancer.enhance(saturation_factor) + +# 应用高斯模糊,以平滑颜色变化 +blurred_image = color_image.filter(ImageFilter.GaussianBlur(1)) + +# 显示图像 +blurred_image.show() + +# 保存图像 +blurred_image.save('path_to_save_image.jpg') diff --git a/inference.py b/inference.py index 47cf227..d5db6c3 100644 --- a/inference.py +++ b/inference.py @@ -115,9 +115,9 @@ def main(args): if __name__ == '__main__': parser = ArgumentParser() - parser.add_argument("--driven_audio", default='./examples/driven_audio/eluosi.wav', help="path to driven audio") - parser.add_argument("--source_image", default='./examples/source_image/full3.png', help="path to source image") - parser.add_argument("--ref_eyeblink", default=None, help="path to reference video providing eye blinking") + parser.add_argument("--driven_audio", default='./examples/driven_audio/20240315_154953.wav', help="path to driven audio") + parser.add_argument("--source_image", default='./examples/source_image/17.png', help="path to source image") + parser.add_argument("--ref_eyeblink", default='./examples/ref_video/E05005.mp4', help="path to reference video providing eye blinking") parser.add_argument("--ref_pose", default=None, help="path to reference video providing pose") parser.add_argument("--checkpoint_dir", default='./checkpoints', help="path to output") parser.add_argument("--result_dir", default='./results', help="path to output") @@ -127,11 +127,11 @@ if __name__ == '__main__': parser.add_argument('--input_yaw', nargs='+', type=int, default=None, help="the input yaw degree of the user ") parser.add_argument('--input_pitch', nargs='+', type=int, default=None, help="the input pitch degree of the user") parser.add_argument('--input_roll', nargs='+', type=int, default=None, help="the input roll degree of the user") - parser.add_argument('--enhancer', type=str, default=None, help="Face enhancer, [gfpgan, RestoreFormer]") + parser.add_argument('--enhancer', type=str, default='gfpgan', help="Face enhancer, [gfpgan, RestoreFormer]") parser.add_argument('--background_enhancer', type=str, default=None, help="background enhancer, [realesrgan]") parser.add_argument("--cpu", dest="cpu", action="store_true") parser.add_argument("--face3dvis", action="store_true", help="generate 3d face and 3d landmarks") - parser.add_argument("--still", action="store_true", default=True, help="can crop back to the orginal videos for the full body aniamtion") + parser.add_argument("--still", action="store_true", default=True, help="can crop back to the orginal videos for the full body aniamtion") parser.add_argument("--preprocess", default='full', choices=['crop', 'resize', 'full'], help="how to preprocess the images" ) # net structure and parameters diff --git a/main.py b/main.py new file mode 100644 index 0000000..1473b08 --- /dev/null +++ b/main.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- + +import os +import subprocess +from fastapi import FastAPI, File, UploadFile +from fastapi.responses import FileResponse +from datetime import datetime + +app = FastAPI() + +def save_upload_file(upload_file: UploadFile, filename: str): + with open(filename, "wb") as buffer: + buffer.write(upload_file.file.read()) + +def generate_video_command(result_dir: str, img_path: str, audio_path: str, video_path: str): + return [ + "python", "script.py", + "--source_image", img_path, + "--result_dir", result_dir, + "--driven_audio", audio_path, + "--ref_eyeblink", video_path, + ] + +def get_latest_sub_dir(result_dir: str): + sub_dirs = [os.path.join(result_dir, d) for d in os.listdir(result_dir) if os.path.isdir(os.path.join(result_dir, d))] + if not sub_dirs: + return None + return max(sub_dirs, key=os.path.getmtime) + +def get_video_duration(video_path: str): + video_duration_command = [ + "ffprobe", + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + video_path + ] + result = subprocess.run(video_duration_command, capture_output=True, text=True) + return float(result.stdout.strip()) + +def trim_video(input_video_path: str, output_video_path: str, duration: float): + trim_command = [ + "ffmpeg", + "-i", input_video_path, + "-t", str(duration - 2), + "-c", "copy", + output_video_path + ] + subprocess.run(trim_command, check=True) + +def remove_audio(input_video_path: str, output_video_path: str): + remove_audio_command = [ + "ffmpeg", + "-i", input_video_path, + "-an", + "-vcodec", "copy", + output_video_path + ] + subprocess.run(remove_audio_command, check=True) + +@app.post("/dynamic-video") +async def generate_video( + image: UploadFile = File(...), +): + img_path = "dynamic/dynamic_image.png" + save_upload_file(image, img_path) + + audio_path = "./examples/driven_audio/dynamic_audio.wav" + + video_path = "./examples/ref_video/dynamic.mp4" + + result_dir = os.path.join("results") + os.makedirs(result_dir, exist_ok=True) + + command = generate_video_command(result_dir, img_path, audio_path, video_path) + subprocess.run(command, check=True) + + latest_sub_dir = get_latest_sub_dir(result_dir) + if not latest_sub_dir: + return {"error": "No subdirectory found in result directory"} + + result_video_path = os.path.join(latest_sub_dir, "dynamic_image##dynamic_audio_enhanced.mp4") + silent_video_path = os.path.join(latest_sub_dir, "dynamic_image##dynamic_audio_enhanced_dynamic.mp4") + + if os.path.exists(result_video_path): + remove_audio(result_video_path, silent_video_path) + return FileResponse(silent_video_path, media_type='video/mp4') + else: + return {"error": "Video file not found"} + +@app.post("/silent-video") +async def generate_and_trim_video( + image: UploadFile = File(...), +): + img_path = "silent/silent_image.png" + save_upload_file(image, img_path) + + audio_path = "./examples/driven_audio/silent_audio.wav" + + video_path = "./examples/ref_video/silent.mp4" + + result_dir = os.path.join("results") + os.makedirs(result_dir, exist_ok=True) + + command = generate_video_command(result_dir, img_path, audio_path, video_path) + subprocess.run(command, check=True) + + latest_sub_dir = get_latest_sub_dir(result_dir) + if not latest_sub_dir: + return {"error": "No subdirectory found in result directory"} + + result_video_path = os.path.join(latest_sub_dir, "silent_image##silent_audio_enhanced.mp4") + trimmed_video_path = os.path.join(latest_sub_dir, "silent_image##silent_audio_enhanced_trimmed.mp4") + + if os.path.exists(result_video_path): + video_duration = get_video_duration(result_video_path) + trim_video(result_video_path, trimmed_video_path, video_duration) + return FileResponse(trimmed_video_path, media_type='video/mp4') + else: + return {"error": "Video file not found"} + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/src/utils/videoio.py b/src/utils/videoio.py index 5399891..21202cf 100644 --- a/src/utils/videoio.py +++ b/src/utils/videoio.py @@ -17,7 +17,7 @@ def load_video_to_cv2(input_path): full_frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) return full_frames -def save_video_with_watermark(video, audio, save_path, watermark='docs/sadtalker_logo.png'): +def save_video_with_watermark(video, audio, save_path, watermark=None): temp_file = str(uuid.uuid4())+'.mp4' cmd = r'ffmpeg -y -i "%s" -i "%s" -vcodec copy "%s"' % (video, audio, temp_file) os.system(cmd) diff --git a/text_wav.py b/text_wav.py new file mode 100644 index 0000000..2a7d93b --- /dev/null +++ b/text_wav.py @@ -0,0 +1,50 @@ +import base64 +import requests +import wave + +def save_audio_to_file(output_file, audio_data): + with wave.open(output_file, 'wb') as wave_file: + wave_file.setnchannels(1) # 设置为单声道 + wave_file.setsampwidth(2) # 设置样本宽度(以字节为单位,2表示16位) + wave_file.setframerate(44100) # 设置帧速率 + wave_file.writeframes(audio_data) + +def text_to_speech(text, output_file): + # 请求参数 + request_data = { + "text": text, + "spk_id": 0, + # 语速 + "speed": 0.87, + "volume": 1.0, + "sample_rate": 0, + "save_path": output_file + } + + # 发送POST请求 + response = requests.post("http://192.168.10.138:8090/paddlespeech/tts", json=request_data) + + # 解析返回的JSON + response_json = response.json() + + if response_json["success"]: + # 获取返回的音频base64编码 + base64_audio = response_json["result"]["audio"] + + # 将音频保存到文件 + with open(output_file, 'wb') as wave_file: + wave_file.write(base64.b64decode(base64_audio)) + else: + print("TTS request failed:", response_json["message"]["description"]) + +# 要发送的语句列表 +statements = [ + "《庆余年》根据猫腻的小说改编,讲述了少年林殊在乱世中成长的故事。他凭借智慧和勇气,卷入复杂的政治斗争,结识志同道合的盟友和强大的对手,经历挑战与考验,最终成为能左右局势的重要人物。剧中不仅有紧张刺激的情节,还探讨了权力、正义与人性的复杂关系,深受观众喜爱。" +] + +# 保存语音的文件名列表 +output_files = [f"E:\\SadTalker\\temp\\wav\\tts_result_{i}.wav" for i in range(len(statements))] + +# 发送每个语句的请求并保存音频文件 +for i in range(len(statements)): + text_to_speech(statements[i], output_files[i])