From d6260aee3ea3b1bba49e265c49339ad8926b22c8 Mon Sep 17 00:00:00 2001 From: lipku Date: Sun, 20 Oct 2024 12:10:44 +0800 Subject: [PATCH] improve video record --- app.py | 11 ++-- basereal.py | 143 ++++++++++++++++++++++++++++++++++------------------ lipreal.py | 6 +-- musereal.py | 8 +-- 4 files changed, 108 insertions(+), 60 deletions(-) diff --git a/app.py b/app.py index 8bb3f1d..791bc55 100644 --- a/app.py +++ b/app.py @@ -244,7 +244,7 @@ async def record(request): sessionid = params.get('sessionid',0) if params['type']=='start_record': # nerfreals[sessionid].put_msg_txt(params['text']) - nerfreals[sessionid].start_recording("data/record_lasted.mp4") + nerfreals[sessionid].start_recording() elif params['type']=='end_record': nerfreals[sessionid].stop_recording() return web.Response( @@ -494,19 +494,22 @@ if __name__ == '__main__': model.eye_areas = test_loader._data.eye_area # we still need test_loader to provide audio features for testing. - for _ in range(opt.max_session): + for k in range(opt.max_session): + opt.sessionid=k nerfreal = NeRFReal(opt, trainer, test_loader) nerfreals.append(nerfreal) elif opt.model == 'musetalk': from musereal import MuseReal print(opt) - for _ in range(opt.max_session): + for k in range(opt.max_session): + opt.sessionid=k nerfreal = MuseReal(opt) nerfreals.append(nerfreal) elif opt.model == 'wav2lip': from lipreal import LipReal print(opt) - for _ in range(opt.max_session): + for k in range(opt.max_session): + opt.sessionid=k nerfreal = LipReal(opt) nerfreals.append(nerfreal) diff --git a/basereal.py b/basereal.py index 8836531..4cb0bf7 100644 --- a/basereal.py +++ b/basereal.py @@ -2,6 +2,7 @@ import math import torch import numpy as np +import subprocess import os import time import cv2 @@ -48,8 +49,9 @@ class BaseReal: self.speaking = False self.recording = False - self.recordq_video = Queue() - self.recordq_audio = Queue() + self._record_video_pipe = None + self._record_audio_pipe = None + self.width = self.height = 0 self.curr_state=0 self.custom_img_cycle = {} @@ -116,63 +118,108 @@ class BaseReal: for key in self.custom_index: self.custom_index[key]=0 - def start_recording(self,path): + def start_recording(self): """开始录制视频""" if self.recording: return + + command = ['ffmpeg', + '-y', '-an', + '-f', 'rawvideo', + '-vcodec','rawvideo', + '-pix_fmt', 'bgr24', #像素格式 + '-s', "{}x{}".format(self.width, self.height), + '-r', str(25), + '-i', '-', + '-pix_fmt', 'yuv420p', + '-vcodec', "h264", + #'-f' , 'flv', + f'temp{self.opt.sessionid}.mp4'] + self._record_video_pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE) + + acommand = ['ffmpeg', + '-y', '-vn', + '-f', 's16le', + #'-acodec','pcm_s16le', + '-ac', '1', + '-ar', '16000', + '-i', '-', + '-acodec', 'aac', + #'-f' , 'wav', + f'temp{self.opt.sessionid}.aac'] + self._record_audio_pipe = subprocess.Popen(acommand, shell=False, stdin=subprocess.PIPE) + self.recording = True - self.recordq_video.queue.clear() - self.recordq_audio.queue.clear() - self.container = av.open(path, mode="w") + # self.recordq_video.queue.clear() + # self.recordq_audio.queue.clear() + # self.container = av.open(path, mode="w") + + # process_thread = Thread(target=self.record_frame, args=()) + # process_thread.start() - process_thread = Thread(target=self.record_frame, args=()) - process_thread.start() + def record_video_data(self,image): + if self.width == 0: + print("image.shape:",image.shape) + self.height,self.width,_ = image.shape + if self.recording: + self._record_video_pipe.stdin.write(image.tostring()) + + def record_audio_data(self,frame): + if self.recording: + self._record_audio_pipe.stdin.write(frame.tostring()) - def record_frame(self): - videostream = self.container.add_stream("libx264", rate=25) - videostream.codec_context.time_base = Fraction(1, 25) - audiostream = self.container.add_stream("aac") - audiostream.codec_context.time_base = Fraction(1, 16000) - init = True - framenum = 0 - while self.recording: - try: - videoframe = self.recordq_video.get(block=True, timeout=1) - videoframe.pts = framenum #int(round(framenum*0.04 / videostream.codec_context.time_base)) - videoframe.dts = videoframe.pts - if init: - videostream.width = videoframe.width - videostream.height = videoframe.height - init = False - for packet in videostream.encode(videoframe): - self.container.mux(packet) - for k in range(2): - audioframe = self.recordq_audio.get(block=True, timeout=1) - audioframe.pts = int(round((framenum*2+k)*0.02 / audiostream.codec_context.time_base)) - audioframe.dts = audioframe.pts - for packet in audiostream.encode(audioframe): - self.container.mux(packet) - framenum += 1 - except queue.Empty: - print('record queue empty,') - continue - except Exception as e: - print(e) - #break - for packet in videostream.encode(None): - self.container.mux(packet) - for packet in audiostream.encode(None): - self.container.mux(packet) - self.container.close() - self.recordq_video.queue.clear() - self.recordq_audio.queue.clear() - print('record thread stop') + # def record_frame(self): + # videostream = self.container.add_stream("libx264", rate=25) + # videostream.codec_context.time_base = Fraction(1, 25) + # audiostream = self.container.add_stream("aac") + # audiostream.codec_context.time_base = Fraction(1, 16000) + # init = True + # framenum = 0 + # while self.recording: + # try: + # videoframe = self.recordq_video.get(block=True, timeout=1) + # videoframe.pts = framenum #int(round(framenum*0.04 / videostream.codec_context.time_base)) + # videoframe.dts = videoframe.pts + # if init: + # videostream.width = videoframe.width + # videostream.height = videoframe.height + # init = False + # for packet in videostream.encode(videoframe): + # self.container.mux(packet) + # for k in range(2): + # audioframe = self.recordq_audio.get(block=True, timeout=1) + # audioframe.pts = int(round((framenum*2+k)*0.02 / audiostream.codec_context.time_base)) + # audioframe.dts = audioframe.pts + # for packet in audiostream.encode(audioframe): + # self.container.mux(packet) + # framenum += 1 + # except queue.Empty: + # print('record queue empty,') + # continue + # except Exception as e: + # print(e) + # #break + # for packet in videostream.encode(None): + # self.container.mux(packet) + # for packet in audiostream.encode(None): + # self.container.mux(packet) + # self.container.close() + # self.recordq_video.queue.clear() + # self.recordq_audio.queue.clear() + # print('record thread stop') def stop_recording(self): """停止录制视频""" if not self.recording: return - self.recording = False + self.recording = False + self._record_video_pipe.stdin.close() #wait() + self._record_video_pipe.wait() + self._record_audio_pipe.stdin.close() + self._record_audio_pipe.wait() + cmd_combine_audio = f"ffmpeg -y -i temp{self.opt.sessionid}.aac -i temp{self.opt.sessionid}.mp4 -c:v copy -c:a copy data/record.mp4" + os.system(cmd_combine_audio) + #os.remove(output_path) def mirror_index(self,size, index): #size = len(self.coord_list_cycle) diff --git a/lipreal.py b/lipreal.py index d742fdb..968eabc 100644 --- a/lipreal.py +++ b/lipreal.py @@ -230,8 +230,7 @@ class LipReal(BaseReal): image = combine_frame #(outputs['image'] * 255).astype(np.uint8) new_frame = VideoFrame.from_ndarray(image, format="bgr24") asyncio.run_coroutine_threadsafe(video_track._queue.put(new_frame), loop) - if self.recording: - self.recordq_video.put(new_frame) + self.record_video_data(image) for audio_frame in audio_frames: frame,type = audio_frame @@ -242,8 +241,7 @@ class LipReal(BaseReal): # if audio_track._queue.qsize()>10: # time.sleep(0.1) asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop) - if self.recording: - self.recordq_audio.put(new_frame) + self.record_audio_data(frame) print('musereal process_frames thread stop') def render(self,quit_event,loop=None,audio_track=None,video_track=None): diff --git a/musereal.py b/musereal.py index b339d42..9ec6b0f 100644 --- a/musereal.py +++ b/musereal.py @@ -261,8 +261,8 @@ class MuseReal(BaseReal): image = combine_frame #(outputs['image'] * 255).astype(np.uint8) new_frame = VideoFrame.from_ndarray(image, format="bgr24") asyncio.run_coroutine_threadsafe(video_track._queue.put(new_frame), loop) - if self.recording: - self.recordq_video.put(new_frame) + self.record_video_data(image) + #self.recordq_video.put(new_frame) for audio_frame in audio_frames: frame,type = audio_frame @@ -273,8 +273,8 @@ class MuseReal(BaseReal): # if audio_track._queue.qsize()>10: # time.sleep(0.1) asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop) - if self.recording: - self.recordq_audio.put(new_frame) + self.record_audio_data(frame) + #self.recordq_audio.put(new_frame) print('musereal process_frames thread stop') def render(self,quit_event,loop=None,audio_track=None,video_track=None):