|
|
|
@ -9,15 +9,17 @@ import time
|
|
|
|
|
import torch.nn.functional as F
|
|
|
|
|
import cv2
|
|
|
|
|
|
|
|
|
|
from asrreal import ASR
|
|
|
|
|
from nerfasr import NerfASR
|
|
|
|
|
from ttsreal import EdgeTTS,VoitsTTS,XTTS
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
from av import AudioFrame, VideoFrame
|
|
|
|
|
from basereal import BaseReal
|
|
|
|
|
|
|
|
|
|
class NeRFReal:
|
|
|
|
|
class NeRFReal(BaseReal):
|
|
|
|
|
def __init__(self, opt, trainer, data_loader, debug=True):
|
|
|
|
|
self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters.
|
|
|
|
|
super().__init__(opt)
|
|
|
|
|
#self.opt = opt # shared with the trainer's opt to support in-place modification of rendering parameters.
|
|
|
|
|
self.W = opt.W
|
|
|
|
|
self.H = opt.H
|
|
|
|
|
|
|
|
|
@ -55,17 +57,11 @@ class NeRFReal:
|
|
|
|
|
#self.ind_index = 0
|
|
|
|
|
#self.ind_num = trainer.model.individual_codes.shape[0]
|
|
|
|
|
|
|
|
|
|
self.customimg_index = 0
|
|
|
|
|
#self.customimg_index = 0
|
|
|
|
|
|
|
|
|
|
# build asr
|
|
|
|
|
self.asr = ASR(opt)
|
|
|
|
|
self.asr = NerfASR(opt,self)
|
|
|
|
|
self.asr.warm_up()
|
|
|
|
|
if opt.tts == "edgetts":
|
|
|
|
|
self.tts = EdgeTTS(opt,self)
|
|
|
|
|
elif opt.tts == "gpt-sovits":
|
|
|
|
|
self.tts = VoitsTTS(opt,self)
|
|
|
|
|
elif opt.tts == "xtts":
|
|
|
|
|
self.tts = XTTS(opt,self)
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
video_path = 'video_stream'
|
|
|
|
@ -124,14 +120,14 @@ class NeRFReal:
|
|
|
|
|
self.asr.pause_talk()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def mirror_index(self, index):
|
|
|
|
|
size = self.opt.customvideo_imgnum
|
|
|
|
|
turn = index // size
|
|
|
|
|
res = index % size
|
|
|
|
|
if turn % 2 == 0:
|
|
|
|
|
return res
|
|
|
|
|
else:
|
|
|
|
|
return size - res - 1
|
|
|
|
|
# def mirror_index(self, index):
|
|
|
|
|
# size = self.opt.customvideo_imgnum
|
|
|
|
|
# turn = index // size
|
|
|
|
|
# res = index % size
|
|
|
|
|
# if turn % 2 == 0:
|
|
|
|
|
# return res
|
|
|
|
|
# else:
|
|
|
|
|
# return size - res - 1
|
|
|
|
|
|
|
|
|
|
def test_step(self,loop=None,audio_track=None,video_track=None):
|
|
|
|
|
|
|
|
|
@ -148,39 +144,57 @@ class NeRFReal:
|
|
|
|
|
# use the live audio stream
|
|
|
|
|
data['auds'] = self.asr.get_next_feat()
|
|
|
|
|
|
|
|
|
|
audiotype = 0
|
|
|
|
|
if self.opt.transport=='rtmp':
|
|
|
|
|
for _ in range(2):
|
|
|
|
|
audiotype1 = 0
|
|
|
|
|
audiotype2 = 0
|
|
|
|
|
#send audio
|
|
|
|
|
for i in range(2):
|
|
|
|
|
frame,type = self.asr.get_audio_out()
|
|
|
|
|
audiotype += type
|
|
|
|
|
if i==0:
|
|
|
|
|
audiotype1 = type
|
|
|
|
|
else:
|
|
|
|
|
audiotype2 = type
|
|
|
|
|
#print(f'[INFO] get_audio_out shape ',frame.shape)
|
|
|
|
|
if self.opt.transport=='rtmp':
|
|
|
|
|
self.streamer.stream_frame_audio(frame)
|
|
|
|
|
else:
|
|
|
|
|
for _ in range(2):
|
|
|
|
|
frame,type = self.asr.get_audio_out()
|
|
|
|
|
audiotype += type
|
|
|
|
|
else: #webrtc
|
|
|
|
|
frame = (frame * 32767).astype(np.int16)
|
|
|
|
|
new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
|
|
|
|
|
new_frame.planes[0].update(frame.tobytes())
|
|
|
|
|
new_frame.sample_rate=16000
|
|
|
|
|
# if audio_track._queue.qsize()>10:
|
|
|
|
|
# time.sleep(0.1)
|
|
|
|
|
asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop)
|
|
|
|
|
|
|
|
|
|
# if self.opt.transport=='rtmp':
|
|
|
|
|
# for _ in range(2):
|
|
|
|
|
# frame,type = self.asr.get_audio_out()
|
|
|
|
|
# audiotype += type
|
|
|
|
|
# #print(f'[INFO] get_audio_out shape ',frame.shape)
|
|
|
|
|
# self.streamer.stream_frame_audio(frame)
|
|
|
|
|
# else: #webrtc
|
|
|
|
|
# for _ in range(2):
|
|
|
|
|
# frame,type = self.asr.get_audio_out()
|
|
|
|
|
# audiotype += type
|
|
|
|
|
# frame = (frame * 32767).astype(np.int16)
|
|
|
|
|
# new_frame = AudioFrame(format='s16', layout='mono', samples=frame.shape[0])
|
|
|
|
|
# new_frame.planes[0].update(frame.tobytes())
|
|
|
|
|
# new_frame.sample_rate=16000
|
|
|
|
|
# # if audio_track._queue.qsize()>10:
|
|
|
|
|
# # time.sleep(0.1)
|
|
|
|
|
# asyncio.run_coroutine_threadsafe(audio_track._queue.put(new_frame), loop)
|
|
|
|
|
#t = time.time()
|
|
|
|
|
if self.opt.customvideo and audiotype!=0:
|
|
|
|
|
self.loader = iter(self.data_loader) #init
|
|
|
|
|
imgindex = self.mirror_index(self.customimg_index)
|
|
|
|
|
if audiotype1!=0 and audiotype2!=0 and self.custom_index.get(audiotype1) is not None: #不为推理视频并且有自定义视频
|
|
|
|
|
mirindex = self.mirror_index(len(self.custom_img_cycle[audiotype1]),self.custom_index[audiotype1])
|
|
|
|
|
#imgindex = self.mirror_index(self.customimg_index)
|
|
|
|
|
#print('custom img index:',imgindex)
|
|
|
|
|
image = cv2.imread(os.path.join(self.opt.customvideo_img, str(int(imgindex))+'.png'))
|
|
|
|
|
#image = cv2.imread(os.path.join(self.opt.customvideo_img, str(int(imgindex))+'.png'))
|
|
|
|
|
image = self.custom_img_cycle[audiotype1][mirindex]
|
|
|
|
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
|
|
|
self.custom_index[audiotype1] += 1
|
|
|
|
|
if self.opt.transport=='rtmp':
|
|
|
|
|
self.streamer.stream_frame(image)
|
|
|
|
|
else:
|
|
|
|
|
new_frame = VideoFrame.from_ndarray(image, format="rgb24")
|
|
|
|
|
asyncio.run_coroutine_threadsafe(video_track._queue.put(new_frame), loop)
|
|
|
|
|
self.customimg_index += 1
|
|
|
|
|
else:
|
|
|
|
|
self.customimg_index = 0
|
|
|
|
|
else: #推理视频+贴回
|
|
|
|
|
outputs = self.trainer.test_gui_with_data(data, self.W, self.H)
|
|
|
|
|
#print('-------ernerf time: ',time.time()-t)
|
|
|
|
|
#print(f'[INFO] outputs shape ',outputs['image'].shape)
|
|
|
|
@ -213,6 +227,8 @@ class NeRFReal:
|
|
|
|
|
#if self.opt.asr:
|
|
|
|
|
# self.asr.warm_up()
|
|
|
|
|
|
|
|
|
|
self.init_customindex()
|
|
|
|
|
|
|
|
|
|
if self.opt.transport=='rtmp':
|
|
|
|
|
from rtmp_streaming import StreamerConfig, Streamer
|
|
|
|
|
fps=25
|
|
|
|
|