From a55de002fa0bf156b4b76b91bb438ca4a3750fbb Mon Sep 17 00:00:00 2001 From: Marstaos <72695763+Marstaos@users.noreply.github.com> Date: Sun, 13 Apr 2025 18:53:44 +0800 Subject: [PATCH] Fix/smooth mouth (#412) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 1. 修复了musetalk方案中,当数字人说话状态变化时,嘴部画面跳变问题; 2. 新增现代美观的前端dashboard.html,集成了对话与朗读功能; 3. 修复了“'weights_only' is an invalid keyword argument for load()”报错。 * bugfix:修复视频连接状态不更新的bug * feature:新增可选是否启用musereal中的混合过度选项 * 参照fix log修复log --------- Co-authored-by: marstaos --- .gitignore | 6 +++++- musereal.py | 59 ++++++++++++++++++++++++++++++----------------------- 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 73a0187..706ba7d 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,10 @@ pretrained .DS_Store workspace/log_ngp.txt .idea +keep_gpu.py models/ *.log -dist \ No newline at end of file +prepared_data/ +models/ +*.log +dist diff --git a/musereal.py b/musereal.py index 72c8758..b2aff58 100644 --- a/musereal.py +++ b/musereal.py @@ -267,12 +267,14 @@ class MuseReal(BaseReal): def process_frames(self,quit_event,loop=None,audio_track=None,video_track=None): - # 新增状态跟踪变量 - self.last_speaking = False - self.transition_start = time.time() - self.transition_duration = 0.1 # 过渡时间 - self.last_silent_frame = None # 静音帧缓存 - self.last_speaking_frame = None # 说话帧缓存 + enable_transition = True # 设置为False禁用过渡效果,True启用 + + if enable_transition: + self.last_speaking = False + self.transition_start = time.time() + self.transition_duration = 0.1 # 过渡时间 + self.last_silent_frame = None # 静音帧缓存 + self.last_speaking_frame = None # 说话帧缓存 while not quit_event.is_set(): try: @@ -280,13 +282,14 @@ class MuseReal(BaseReal): except queue.Empty: continue - # 检测状态变化 - current_speaking = not (audio_frames[0][1]!=0 and audio_frames[1][1]!=0) - if current_speaking != self.last_speaking: - logger.info(f"状态切换:{'说话' if self.last_speaking else '静音'} → {'说话' if current_speaking else '静音'}") - self.transition_start = time.time() - self.last_speaking = current_speaking - + if enable_transition: + # 检测状态变化 + current_speaking = not (audio_frames[0][1]!=0 and audio_frames[1][1]!=0) + if current_speaking != self.last_speaking: + logger.info(f"状态切换:{'说话' if self.last_speaking else '静音'} → {'说话' if current_speaking else '静音'}") + self.transition_start = time.time() + self.last_speaking = current_speaking + if audio_frames[0][1]!=0 and audio_frames[1][1]!=0: self.speaking = False audiotype = audio_frames[0][1] @@ -297,14 +300,17 @@ class MuseReal(BaseReal): else: target_frame = self.frame_list_cycle[idx] - # 说话→静音过渡 - if time.time() - self.transition_start < self.transition_duration and self.last_speaking_frame is not None: - alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration) - combine_frame = cv2.addWeighted(self.last_speaking_frame, 1-alpha, target_frame, alpha, 0) + if enable_transition: + # 说话→静音过渡 + if time.time() - self.transition_start < self.transition_duration and self.last_speaking_frame is not None: + alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration) + combine_frame = cv2.addWeighted(self.last_speaking_frame, 1-alpha, target_frame, alpha, 0) + else: + combine_frame = target_frame + # 缓存静音帧 + self.last_silent_frame = combine_frame.copy() else: combine_frame = target_frame - # 缓存静音帧 - self.last_silent_frame = combine_frame.copy() else: self.speaking = True bbox = self.coord_list_cycle[idx] @@ -318,15 +324,18 @@ class MuseReal(BaseReal): mask = self.mask_list_cycle[idx] mask_crop_box = self.mask_coords_list_cycle[idx] - # 静音→说话过渡 current_frame = get_image_blending(ori_frame,res_frame,bbox,mask,mask_crop_box) - if time.time() - self.transition_start < self.transition_duration and self.last_silent_frame is not None: - alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration) - combine_frame = cv2.addWeighted(self.last_silent_frame, 1-alpha, current_frame, alpha, 0) + if enable_transition: + # 静音→说话过渡 + if time.time() - self.transition_start < self.transition_duration and self.last_silent_frame is not None: + alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration) + combine_frame = cv2.addWeighted(self.last_silent_frame, 1-alpha, current_frame, alpha, 0) + else: + combine_frame = current_frame + # 缓存说话帧 + self.last_speaking_frame = combine_frame.copy() else: combine_frame = current_frame - # 缓存说话帧 - self.last_speaking_frame = combine_frame.copy() image = combine_frame new_frame = VideoFrame.from_ndarray(image, format="bgr24")