Fix/smooth mouth (#412)

* 1. 修复了musetalk方案中,当数字人说话状态变化时,嘴部画面跳变问题;
2. 新增现代美观的前端dashboard.html,集成了对话与朗读功能;
3. 修复了“'weights_only' is an invalid keyword argument for load()”报错。

* bugfix:修复视频连接状态不更新的bug

* feature:新增可选是否启用musereal中的混合过度选项

* 参照fix log修复log

---------

Co-authored-by: marstaos <liu.marstaos@outlook.com>
main
Marstaos 4 months ago committed by GitHub
parent 2e0e5d8330
commit a55de002fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

4
.gitignore vendored

@ -16,6 +16,10 @@ pretrained
.DS_Store .DS_Store
workspace/log_ngp.txt workspace/log_ngp.txt
.idea .idea
keep_gpu.py
models/
*.log
prepared_data/
models/ models/
*.log *.log
dist dist

@ -267,12 +267,14 @@ class MuseReal(BaseReal):
def process_frames(self,quit_event,loop=None,audio_track=None,video_track=None): def process_frames(self,quit_event,loop=None,audio_track=None,video_track=None):
# 新增状态跟踪变量 enable_transition = True # 设置为False禁用过渡效果True启用
self.last_speaking = False
self.transition_start = time.time() if enable_transition:
self.transition_duration = 0.1 # 过渡时间 self.last_speaking = False
self.last_silent_frame = None # 静音帧缓存 self.transition_start = time.time()
self.last_speaking_frame = None # 说话帧缓存 self.transition_duration = 0.1 # 过渡时间
self.last_silent_frame = None # 静音帧缓存
self.last_speaking_frame = None # 说话帧缓存
while not quit_event.is_set(): while not quit_event.is_set():
try: try:
@ -280,12 +282,13 @@ class MuseReal(BaseReal):
except queue.Empty: except queue.Empty:
continue continue
# 检测状态变化 if enable_transition:
current_speaking = not (audio_frames[0][1]!=0 and audio_frames[1][1]!=0) # 检测状态变化
if current_speaking != self.last_speaking: current_speaking = not (audio_frames[0][1]!=0 and audio_frames[1][1]!=0)
logger.info(f"状态切换:{'说话' if self.last_speaking else '静音'}{'说话' if current_speaking else '静音'}") if current_speaking != self.last_speaking:
self.transition_start = time.time() logger.info(f"状态切换:{'说话' if self.last_speaking else '静音'}{'说话' if current_speaking else '静音'}")
self.last_speaking = current_speaking self.transition_start = time.time()
self.last_speaking = current_speaking
if audio_frames[0][1]!=0 and audio_frames[1][1]!=0: if audio_frames[0][1]!=0 and audio_frames[1][1]!=0:
self.speaking = False self.speaking = False
@ -297,14 +300,17 @@ class MuseReal(BaseReal):
else: else:
target_frame = self.frame_list_cycle[idx] target_frame = self.frame_list_cycle[idx]
# 说话→静音过渡 if enable_transition:
if time.time() - self.transition_start < self.transition_duration and self.last_speaking_frame is not None: # 说话→静音过渡
alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration) if time.time() - self.transition_start < self.transition_duration and self.last_speaking_frame is not None:
combine_frame = cv2.addWeighted(self.last_speaking_frame, 1-alpha, target_frame, alpha, 0) alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration)
combine_frame = cv2.addWeighted(self.last_speaking_frame, 1-alpha, target_frame, alpha, 0)
else:
combine_frame = target_frame
# 缓存静音帧
self.last_silent_frame = combine_frame.copy()
else: else:
combine_frame = target_frame combine_frame = target_frame
# 缓存静音帧
self.last_silent_frame = combine_frame.copy()
else: else:
self.speaking = True self.speaking = True
bbox = self.coord_list_cycle[idx] bbox = self.coord_list_cycle[idx]
@ -318,15 +324,18 @@ class MuseReal(BaseReal):
mask = self.mask_list_cycle[idx] mask = self.mask_list_cycle[idx]
mask_crop_box = self.mask_coords_list_cycle[idx] mask_crop_box = self.mask_coords_list_cycle[idx]
# 静音→说话过渡
current_frame = get_image_blending(ori_frame,res_frame,bbox,mask,mask_crop_box) current_frame = get_image_blending(ori_frame,res_frame,bbox,mask,mask_crop_box)
if time.time() - self.transition_start < self.transition_duration and self.last_silent_frame is not None: if enable_transition:
alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration) # 静音→说话过渡
combine_frame = cv2.addWeighted(self.last_silent_frame, 1-alpha, current_frame, alpha, 0) if time.time() - self.transition_start < self.transition_duration and self.last_silent_frame is not None:
alpha = min(1.0, (time.time() - self.transition_start) / self.transition_duration)
combine_frame = cv2.addWeighted(self.last_silent_frame, 1-alpha, current_frame, alpha, 0)
else:
combine_frame = current_frame
# 缓存说话帧
self.last_speaking_frame = combine_frame.copy()
else: else:
combine_frame = current_frame combine_frame = current_frame
# 缓存说话帧
self.last_speaking_frame = combine_frame.copy()
image = combine_frame image = combine_frame
new_frame = VideoFrame.from_ndarray(image, format="bgr24") new_frame = VideoFrame.from_ndarray(image, format="bgr24")

Loading…
Cancel
Save