From a55de002fa0bf156b4b76b91bb438ca4a3750fbb Mon Sep 17 00:00:00 2001
From: Marstaos <72695763+Marstaos@users.noreply.github.com>
Date: Sun, 13 Apr 2025 18:53:44 +0800
Subject: [PATCH] Fix/smooth mouth (#412)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 1. 修复了musetalk方案中，当数字人说话状态变化时，嘴部画面跳变问题；
2. 新增现代美观的前端dashboard.html，集成了对话与朗读功能；
3. 修复了“'weights_only' is an invalid keyword argument for load()”报错。

* bugfix:修复视频连接状态不更新的bug

* feature:新增可选是否启用musereal中的混合过度选项

* 参照fix log修复log

---------

Co-authored-by: marstaos <liu.marstaos@outlook.com>
---
 .gitignore  |  6 +++++-
 musereal.py | 59 ++++++++++++++++++++++++++++++-----------------------
 2 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/.gitignore b/.gitignore
index 73a0187..706ba7d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,10 @@ pretrained
 .DS_Store
 workspace/log_ngp.txt
 .idea
+keep_gpu.py
 models/
 *.log
-dist
\ No newline at end of file
+prepared_data/
+models/
+*.log
+dist
diff --git a/musereal.py b/musereal.py
index 72c8758..b2aff58 100644
--- a/musereal.py
+++ b/musereal.py
@@ -267,12 +267,14 @@ class MuseReal(BaseReal):
       
 
     def process_frames(self,quit_event,loop=None,audio_track=None,video_track=None):
-        # 新增状态跟踪变量
-        self.last_speaking = False
-        self.transition_start = time.time()
-        self.transition_duration = 0.1  # 过渡时间
-        self.last_silent_frame = None  # 静音帧缓存
-        self.last_speaking_frame = None  # 说话帧缓存
+        enable_transition = True  # 设置为False禁用过渡效果，True启用
+        
+        if enable_transition:
+            self.last_speaking = False
+            self.transition_start = time.time()
+            self.transition_duration = 0.1  # 过渡时间
+            self.last_silent_frame = None  # 静音帧缓存
+            self.last_speaking_frame = None  # 说话帧缓存
         
         while not quit_event.is_set():
             try:
@@ -280,13 +282,14 @@ class MuseReal(BaseReal):
             except queue.Empty:
                 continue
             
-            # 检测状态变化
-            current_speaking = not (audio_frames[0][1]!=0 and audio_frames[1][1]!=0)
-            if current_speaking != self.last_speaking:
-                logger.info(f"状态切换：{'说话' if self.last_speaking else '静音'} → {'说话' if current_speaking else '静音'}")
-                self.transition_start = time.time()
-            self.last_speaking = current_speaking
-            
+            if enable_transition:
+                # 检测状态变化
+                current_speaking = not (audio_frames[0][1]!=0 and audio_frames[1][1]!=0)
+                if current_speaking != self.last_speaking:
+                    logger.info(f"状态切换：{'说话' if self.last_speaking else '静音'} → {'说话' if current_speaking else '静音'}")
+                    self.transition_start = time.time()
+                self.last_speaking = current_speaking
+
             if audio_frames[0][1]!=0 and audio_frames[1][1]!=0: 
                 self.speaking = False
                 audiotype = audio_frames[0][1]
@@ -297,14 +300,17 @@ class MuseReal(BaseReal):
                 else:
                     target_frame = self.frame_list_cycle[idx]
                 
-                # 说话→静音过渡
-                if time.time() - self.transition_start < self.transition_duration and self.last_speaking_frame is not None:
-                    alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration)
-                    combine_frame = cv2.addWeighted(self.last_speaking_frame, 1-alpha, target_frame, alpha, 0)
+                if enable_transition:
+                    # 说话→静音过渡
+                    if time.time() - self.transition_start < self.transition_duration and self.last_speaking_frame is not None:
+                        alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration)
+                        combine_frame = cv2.addWeighted(self.last_speaking_frame, 1-alpha, target_frame, alpha, 0)
+                    else:
+                        combine_frame = target_frame
+                    # 缓存静音帧
+                    self.last_silent_frame = combine_frame.copy()
                 else:
                     combine_frame = target_frame
-                # 缓存静音帧
-                self.last_silent_frame = combine_frame.copy()
             else:
                 self.speaking = True
                 bbox = self.coord_list_cycle[idx]
@@ -318,15 +324,18 @@ class MuseReal(BaseReal):
                 mask = self.mask_list_cycle[idx]
                 mask_crop_box = self.mask_coords_list_cycle[idx]
 
-                # 静音→说话过渡
                 current_frame = get_image_blending(ori_frame,res_frame,bbox,mask,mask_crop_box)
-                if time.time() - self.transition_start < self.transition_duration and self.last_silent_frame is not None:
-                    alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration)
-                    combine_frame = cv2.addWeighted(self.last_silent_frame, 1-alpha, current_frame, alpha, 0)
+                if enable_transition:
+                    # 静音→说话过渡
+                    if time.time() - self.transition_start < self.transition_duration and self.last_silent_frame is not None:
+                        alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration)
+                        combine_frame = cv2.addWeighted(self.last_silent_frame, 1-alpha, current_frame, alpha, 0)
+                    else:
+                        combine_frame = current_frame
+                    # 缓存说话帧
+                    self.last_speaking_frame = combine_frame.copy()
                 else:
                     combine_frame = current_frame
-                # 缓存说话帧
-                self.last_speaking_frame = combine_frame.copy()
 
             image = combine_frame
             new_frame = VideoFrame.from_ndarray(image, format="bgr24")