improve video record

main
lipku 9 months ago
parent 14208c6d60
commit d6260aee3e

@ -244,7 +244,7 @@ async def record(request):
sessionid = params.get('sessionid',0) sessionid = params.get('sessionid',0)
if params['type']=='start_record': if params['type']=='start_record':
# nerfreals[sessionid].put_msg_txt(params['text']) # nerfreals[sessionid].put_msg_txt(params['text'])
nerfreals[sessionid].start_recording("data/record_lasted.mp4") nerfreals[sessionid].start_recording()
elif params['type']=='end_record': elif params['type']=='end_record':
nerfreals[sessionid].stop_recording() nerfreals[sessionid].stop_recording()
return web.Response( return web.Response(
@ -494,19 +494,22 @@ if __name__ == '__main__':
model.eye_areas = test_loader._data.eye_area model.eye_areas = test_loader._data.eye_area
# we still need test_loader to provide audio features for testing. # we still need test_loader to provide audio features for testing.
for _ in range(opt.max_session): for k in range(opt.max_session):
opt.sessionid=k
nerfreal = NeRFReal(opt, trainer, test_loader) nerfreal = NeRFReal(opt, trainer, test_loader)
nerfreals.append(nerfreal) nerfreals.append(nerfreal)
elif opt.model == 'musetalk': elif opt.model == 'musetalk':
from musereal import MuseReal from musereal import MuseReal
print(opt) print(opt)
for _ in range(opt.max_session): for k in range(opt.max_session):
opt.sessionid=k
nerfreal = MuseReal(opt) nerfreal = MuseReal(opt)
nerfreals.append(nerfreal) nerfreals.append(nerfreal)
elif opt.model == 'wav2lip': elif opt.model == 'wav2lip':
from lipreal import LipReal from lipreal import LipReal
print(opt) print(opt)
for _ in range(opt.max_session): for k in range(opt.max_session):
opt.sessionid=k
nerfreal = LipReal(opt) nerfreal = LipReal(opt)
nerfreals.append(nerfreal) nerfreals.append(nerfreal)

@ -2,6 +2,7 @@ import math
import torch import torch
import numpy as np import numpy as np
import subprocess
import os import os
import time import time
import cv2 import cv2
@ -48,8 +49,9 @@ class BaseReal:
self.speaking = False self.speaking = False
self.recording = False self.recording = False
self.recordq_video = Queue() self._record_video_pipe = None
self.recordq_audio = Queue() self._record_audio_pipe = None
self.width = self.height = 0
self.curr_state=0 self.curr_state=0
self.custom_img_cycle = {} self.custom_img_cycle = {}
@ -116,63 +118,108 @@ class BaseReal:
for key in self.custom_index: for key in self.custom_index:
self.custom_index[key]=0 self.custom_index[key]=0
def start_recording(self,path): def start_recording(self):
"""开始录制视频""" """开始录制视频"""
if self.recording: if self.recording:
return return
command = ['ffmpeg',
'-y', '-an',
'-f', 'rawvideo',
'-vcodec','rawvideo',
'-pix_fmt', 'bgr24', #像素格式
'-s', "{}x{}".format(self.width, self.height),
'-r', str(25),
'-i', '-',
'-pix_fmt', 'yuv420p',
'-vcodec', "h264",
#'-f' , 'flv',
f'temp{self.opt.sessionid}.mp4']
self._record_video_pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
acommand = ['ffmpeg',
'-y', '-vn',
'-f', 's16le',
#'-acodec','pcm_s16le',
'-ac', '1',
'-ar', '16000',
'-i', '-',
'-acodec', 'aac',
#'-f' , 'wav',
f'temp{self.opt.sessionid}.aac']
self._record_audio_pipe = subprocess.Popen(acommand, shell=False, stdin=subprocess.PIPE)
self.recording = True self.recording = True
self.recordq_video.queue.clear() # self.recordq_video.queue.clear()
self.recordq_audio.queue.clear() # self.recordq_audio.queue.clear()
self.container = av.open(path, mode="w") # self.container = av.open(path, mode="w")
# process_thread = Thread(target=self.record_frame, args=())
# process_thread.start()
process_thread = Thread(target=self.record_frame, args=()) def record_video_data(self,image):
process_thread.start() if self.width == 0:
print("image.shape:",image.shape)
self.height,self.width,_ = image.shape
if self.recording:
self._record_video_pipe.stdin.write(image.tostring())
def record_audio_data(self,frame):
if self.recording:
self._record_audio_pipe.stdin.write(frame.tostring())
def record_frame(self): # def record_frame(self):
videostream = self.container.add_stream("libx264", rate=25) # videostream = self.container.add_stream("libx264", rate=25)
videostream.codec_context.time_base = Fraction(1, 25) # videostream.codec_context.time_base = Fraction(1, 25)
audiostream = self.container.add_stream("aac") # audiostream = self.container.add_stream("aac")
audiostream.codec_context.time_base = Fraction(1, 16000) # audiostream.codec_context.time_base = Fraction(1, 16000)
init = True # init = True
framenum = 0 # framenum = 0
while self.recording: # while self.recording:
try: # try:
videoframe = self.recordq_video.get(block=True, timeout=1) # videoframe = self.recordq_video.get(block=True, timeout=1)
videoframe.pts = framenum #int(round(framenum*0.04 / videostream.codec_context.time_base)) # videoframe.pts = framenum #int(round(framenum*0.04 / videostream.codec_context.time_base))
videoframe.dts = videoframe.pts # videoframe.dts = videoframe.pts
if init: # if init:
videostream.width = videoframe.width # videostream.width = videoframe.width
videostream.height = videoframe.height # videostream.height = videoframe.height
init = False # init = False
for packet in videostream.encode(videoframe): # for packet in videostream.encode(videoframe):
self.container.mux(packet) # self.container.mux(packet)
for k in range(2): # for k in range(2):
audioframe = self.recordq_audio.get(block=True, timeout=1) # audioframe = self.recordq_audio.get(block=True, timeout=1)
audioframe.pts = int(round((framenum*2+k)*0.02 / audiostream.codec_context.time_base)) # audioframe.pts = int(round((framenum*2+k)*0.02 / audiostream.codec_context.time_base))
audioframe.dts = audioframe.pts # audioframe.dts = audioframe.pts
for packet in audiostream.encode(audioframe): # for packet in audiostream.encode(audioframe):
self.container.mux(packet) # self.container.mux(packet)
framenum += 1 # framenum += 1
except queue.Empty: # except queue.Empty:
print('record queue empty,') # print('record queue empty,')
continue # continue
except Exception as e: # except Exception as e:
print(e) # print(e)
#break # #break
for packet in videostream.encode(None): # for packet in videostream.encode(None):
self.container.mux(packet) # self.container.mux(packet)
for packet in audiostream.encode(None): # for packet in audiostream.encode(None):
self.container.mux(packet) # self.container.mux(packet)
self.container.close() # self.container.close()
self.recordq_video.queue.clear() # self.recordq_video.queue.clear()
self.recordq_audio.queue.clear() # self.recordq_audio.queue.clear()
print('record thread stop') # print('record thread stop')
def stop_recording(self): def stop_recording(self):
"""停止录制视频""" """停止录制视频"""
if not self.recording: if not self.recording:
return return
self.recording = False self.recording = False
self._record_video_pipe.stdin.close() #wait()
self._record_video_pipe.wait()
self._record_audio_pipe.stdin.close()
self._record_audio_pipe.wait()
cmd_combine_audio = f"ffmpeg -y -i temp{self.opt.sessionid}.aac -i temp{self.opt.sessionid}.mp4 -c:v copy -c:a copy data/record.mp4"
os.system(cmd_combine_audio)
#os.remove(output_path)
def mirror_index(self,size, index): def mirror_index(self,size, index):
#size = len(self.coord_list_cycle) #size = len(self.coord_list_cycle)

@ -230,8 +230,7 @@ class LipReal(BaseReal):
image = combine_frame #(outputs['image'] * 255).astype(np.uint8) image = combine_frame #(outputs['image'] * 255).astype(np.uint8)
new_frame = VideoFrame.from_ndarray(image, format="bgr24") new_frame = VideoFrame.from_ndarray(image, format="bgr24")
asyncio.run_coroutine_threadsafe(video_track._queue.put(new_frame), loop) asyncio.run_coroutine_threadsafe(video_track._queue.put(new_frame), loop)
if self.recording: self.record_video_data(image)
self.recordq_video.put(new_frame)
for audio_frame in audio_frames: for audio_frame in audio_frames:
frame,type = audio_frame frame,type = audio_frame
@ -242,8 +241,7 @@ class LipReal(BaseReal):
# if audio_track._queue.qsize()>10: # if audio_track._queue.qsize()>10:
# time.sleep(0.1) # time.sleep(0.1)
asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop) asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop)
if self.recording: self.record_audio_data(frame)
self.recordq_audio.put(new_frame)
print('musereal process_frames thread stop') print('musereal process_frames thread stop')
def render(self,quit_event,loop=None,audio_track=None,video_track=None): def render(self,quit_event,loop=None,audio_track=None,video_track=None):

@ -261,8 +261,8 @@ class MuseReal(BaseReal):
image = combine_frame #(outputs['image'] * 255).astype(np.uint8) image = combine_frame #(outputs['image'] * 255).astype(np.uint8)
new_frame = VideoFrame.from_ndarray(image, format="bgr24") new_frame = VideoFrame.from_ndarray(image, format="bgr24")
asyncio.run_coroutine_threadsafe(video_track._queue.put(new_frame), loop) asyncio.run_coroutine_threadsafe(video_track._queue.put(new_frame), loop)
if self.recording: self.record_video_data(image)
self.recordq_video.put(new_frame) #self.recordq_video.put(new_frame)
for audio_frame in audio_frames: for audio_frame in audio_frames:
frame,type = audio_frame frame,type = audio_frame
@ -273,8 +273,8 @@ class MuseReal(BaseReal):
# if audio_track._queue.qsize()>10: # if audio_track._queue.qsize()>10:
# time.sleep(0.1) # time.sleep(0.1)
asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop) asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop)
if self.recording: self.record_audio_data(frame)
self.recordq_audio.put(new_frame) #self.recordq_audio.put(new_frame)
print('musereal process_frames thread stop') print('musereal process_frames thread stop')
def render(self,quit_event,loop=None,audio_track=None,video_track=None): def render(self,quit_event,loop=None,audio_track=None,video_track=None):

Loading…
Cancel
Save