fix spelling mistakes

main
lipku 4 months ago
parent ac42a340b5
commit 2e0e5d8330

@ -44,7 +44,7 @@ For setting up the linux cuda environment, you can refer to this article https:/
## 2. Quick Start ## 2. Quick Start
- Download the models - Download the models
Baidu Cloud Disk <https://pan.baidu.com/s/1yOsQ06-RIDTJd3HFCw4wtA> Password: ltua Quark Cloud Disk <https://pan.quark.cn/s/83a750323ef0>
Google Drive <https://drive.google.com/drive/folders/1FOC_MD6wdogyyX_7V1d4NDIO7P9NlSAJ?usp=sharing> Google Drive <https://drive.google.com/drive/folders/1FOC_MD6wdogyyX_7V1d4NDIO7P9NlSAJ?usp=sharing>
Copy wav2lip256.pth to the models folder of this project and rename it to wav2lip.pth; Copy wav2lip256.pth to the models folder of this project and rename it to wav2lip.pth;
Extract wav2lip256_avatar1.tar.gz and copy the entire folder to the data/avatars folder of this project. Extract wav2lip256_avatar1.tar.gz and copy the entire folder to the data/avatars folder of this project.

@ -45,7 +45,7 @@ linux cuda环境搭建可以参考这篇文章 https://zhuanlan.zhihu.com/p/6749
## 2. Quick Start ## 2. Quick Start
- 下载模型 - 下载模型
百度云盘<https://pan.baidu.com/s/1yOsQ06-RIDTJd3HFCw4wtA> 密码: ltua 夸克云盘<https://pan.quark.cn/s/83a750323ef0>
GoogleDriver <https://drive.google.com/drive/folders/1FOC_MD6wdogyyX_7V1d4NDIO7P9NlSAJ?usp=sharing> GoogleDriver <https://drive.google.com/drive/folders/1FOC_MD6wdogyyX_7V1d4NDIO7P9NlSAJ?usp=sharing>
将wav2lip256.pth拷到本项目的models下, 重命名为wav2lip.pth; 将wav2lip256.pth拷到本项目的models下, 重命名为wav2lip.pth;
将wav2lip256_avatar1.tar.gz解压后整个文件夹拷到本项目的data/avatars下 将wav2lip256_avatar1.tar.gz解压后整个文件夹拷到本项目的data/avatars下

@ -35,7 +35,7 @@ import soundfile as sf
import av import av
from fractions import Fraction from fractions import Fraction
from ttsreal import EdgeTTS,VoitsTTS,XTTS,CosyVoiceTTS,FishTTS,TencentTTS from ttsreal import EdgeTTS,SovitsTTS,XTTS,CosyVoiceTTS,FishTTS,TencentTTS
from logger import logger from logger import logger
from tqdm import tqdm from tqdm import tqdm
@ -57,7 +57,7 @@ class BaseReal:
if opt.tts == "edgetts": if opt.tts == "edgetts":
self.tts = EdgeTTS(opt,self) self.tts = EdgeTTS(opt,self)
elif opt.tts == "gpt-sovits": elif opt.tts == "gpt-sovits":
self.tts = VoitsTTS(opt,self) self.tts = SovitsTTS(opt,self)
elif opt.tts == "xtts": elif opt.tts == "xtts":
self.tts = XTTS(opt,self) self.tts = XTTS(opt,self)
elif opt.tts == "cosyvoice": elif opt.tts == "cosyvoice":

@ -107,9 +107,9 @@ class EdgeTTS(BaseTTS):
eventpoint=None eventpoint=None
streamlen -= self.chunk streamlen -= self.chunk
if idx==0: if idx==0:
eventpoint={'status':'start','text':text,'msgenvent':textevent} eventpoint={'status':'start','text':text,'msgevent':textevent}
elif streamlen<self.chunk: elif streamlen<self.chunk:
eventpoint={'status':'end','text':text,'msgenvent':textevent} eventpoint={'status':'end','text':text,'msgevent':textevent}
self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint) self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint)
idx += self.chunk idx += self.chunk
#if streamlen>0: #skip last frame(not 20ms) #if streamlen>0: #skip last frame(not 20ms)
@ -219,16 +219,16 @@ class FishTTS(BaseTTS):
while streamlen >= self.chunk: while streamlen >= self.chunk:
eventpoint=None eventpoint=None
if first: if first:
eventpoint={'status':'start','text':text,'msgenvent':textevent} eventpoint={'status':'start','text':text,'msgevent':textevent}
first = False first = False
self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint) self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint)
streamlen -= self.chunk streamlen -= self.chunk
idx += self.chunk idx += self.chunk
eventpoint={'status':'end','text':text,'msgenvent':textevent} eventpoint={'status':'end','text':text,'msgevent':textevent}
self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint) self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint)
########################################################################################### ###########################################################################################
class VoitsTTS(BaseTTS): class SovitsTTS(BaseTTS):
def txt_to_audio(self,msg): def txt_to_audio(self,msg):
text,textevent = msg text,textevent = msg
self.stream_tts( self.stream_tts(
@ -316,12 +316,12 @@ class VoitsTTS(BaseTTS):
while streamlen >= self.chunk: while streamlen >= self.chunk:
eventpoint=None eventpoint=None
if first: if first:
eventpoint={'status':'start','text':text,'msgenvent':textevent} eventpoint={'status':'start','text':text,'msgevent':textevent}
first = False first = False
self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint) self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint)
streamlen -= self.chunk streamlen -= self.chunk
idx += self.chunk idx += self.chunk
eventpoint={'status':'end','text':text,'msgenvent':textevent} eventpoint={'status':'end','text':text,'msgevent':textevent}
self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint) self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint)
########################################################################################### ###########################################################################################
@ -382,12 +382,12 @@ class CosyVoiceTTS(BaseTTS):
while streamlen >= self.chunk: while streamlen >= self.chunk:
eventpoint=None eventpoint=None
if first: if first:
eventpoint={'status':'start','text':text,'msgenvent':textevent} eventpoint={'status':'start','text':text,'msgevent':textevent}
first = False first = False
self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint) self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint)
streamlen -= self.chunk streamlen -= self.chunk
idx += self.chunk idx += self.chunk
eventpoint={'status':'end','text':text,'msgenvent':textevent} eventpoint={'status':'end','text':text,'msgevent':textevent}
self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint) self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint)
########################################################################################### ###########################################################################################
@ -505,13 +505,13 @@ class TencentTTS(BaseTTS):
while streamlen >= self.chunk: while streamlen >= self.chunk:
eventpoint=None eventpoint=None
if first: if first:
eventpoint={'status':'start','text':text,'msgenvent':textevent} eventpoint={'status':'start','text':text,'msgevent':textevent}
first = False first = False
self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint) self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint)
streamlen -= self.chunk streamlen -= self.chunk
idx += self.chunk idx += self.chunk
last_stream = stream[idx:] #get the remain stream last_stream = stream[idx:] #get the remain stream
eventpoint={'status':'end','text':text,'msgenvent':textevent} eventpoint={'status':'end','text':text,'msgevent':textevent}
self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint) self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint)
########################################################################################### ###########################################################################################
@ -583,10 +583,10 @@ class XTTS(BaseTTS):
while streamlen >= self.chunk: while streamlen >= self.chunk:
eventpoint=None eventpoint=None
if first: if first:
eventpoint={'status':'start','text':text,'msgenvent':textevent} eventpoint={'status':'start','text':text,'msgevent':textevent}
first = False first = False
self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint) self.parent.put_audio_frame(stream[idx:idx+self.chunk],eventpoint)
streamlen -= self.chunk streamlen -= self.chunk
idx += self.chunk idx += self.chunk
eventpoint={'status':'end','text':text,'msgenvent':textevent} eventpoint={'status':'end','text':text,'msgevent':textevent}
self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint) self.parent.put_audio_frame(np.zeros(self.chunk,np.float32),eventpoint)
Loading…
Cancel
Save