|
|
@ -37,7 +37,8 @@ class BaseTTS:
|
|
|
|
self.state = State.PAUSE
|
|
|
|
self.state = State.PAUSE
|
|
|
|
|
|
|
|
|
|
|
|
def put_msg_txt(self,msg):
|
|
|
|
def put_msg_txt(self,msg):
|
|
|
|
self.msgqueue.put(msg)
|
|
|
|
if len(msg)>0:
|
|
|
|
|
|
|
|
self.msgqueue.put(msg)
|
|
|
|
|
|
|
|
|
|
|
|
def render(self,quit_event):
|
|
|
|
def render(self,quit_event):
|
|
|
|
process_thread = Thread(target=self.process_tts, args=(quit_event,))
|
|
|
|
process_thread = Thread(target=self.process_tts, args=(quit_event,))
|
|
|
@ -99,19 +100,22 @@ class EdgeTTS(BaseTTS):
|
|
|
|
return stream
|
|
|
|
return stream
|
|
|
|
|
|
|
|
|
|
|
|
async def __main(self,voicename: str, text: str):
|
|
|
|
async def __main(self,voicename: str, text: str):
|
|
|
|
communicate = edge_tts.Communicate(text, voicename)
|
|
|
|
try:
|
|
|
|
|
|
|
|
communicate = edge_tts.Communicate(text, voicename)
|
|
|
|
#with open(OUTPUT_FILE, "wb") as file:
|
|
|
|
|
|
|
|
first = True
|
|
|
|
#with open(OUTPUT_FILE, "wb") as file:
|
|
|
|
async for chunk in communicate.stream():
|
|
|
|
first = True
|
|
|
|
if first:
|
|
|
|
async for chunk in communicate.stream():
|
|
|
|
first = False
|
|
|
|
if first:
|
|
|
|
if chunk["type"] == "audio" and self.state==State.RUNNING:
|
|
|
|
first = False
|
|
|
|
#self.push_audio(chunk["data"])
|
|
|
|
if chunk["type"] == "audio" and self.state==State.RUNNING:
|
|
|
|
self.input_stream.write(chunk["data"])
|
|
|
|
#self.push_audio(chunk["data"])
|
|
|
|
#file.write(chunk["data"])
|
|
|
|
self.input_stream.write(chunk["data"])
|
|
|
|
elif chunk["type"] == "WordBoundary":
|
|
|
|
#file.write(chunk["data"])
|
|
|
|
pass
|
|
|
|
elif chunk["type"] == "WordBoundary":
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
print(e)
|
|
|
|
|
|
|
|
|
|
|
|
###########################################################################################
|
|
|
|
###########################################################################################
|
|
|
|
class VoitsTTS(BaseTTS):
|
|
|
|
class VoitsTTS(BaseTTS):
|
|
|
@ -143,28 +147,31 @@ class VoitsTTS(BaseTTS):
|
|
|
|
# req["emotion"] = emotion
|
|
|
|
# req["emotion"] = emotion
|
|
|
|
# #req["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
|
|
|
|
# #req["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
|
|
|
|
# req["streaming_mode"] = True
|
|
|
|
# req["streaming_mode"] = True
|
|
|
|
res = requests.post(
|
|
|
|
try:
|
|
|
|
f"{server_url}/tts",
|
|
|
|
res = requests.post(
|
|
|
|
json=req,
|
|
|
|
f"{server_url}/tts",
|
|
|
|
stream=True,
|
|
|
|
json=req,
|
|
|
|
)
|
|
|
|
stream=True,
|
|
|
|
end = time.perf_counter()
|
|
|
|
)
|
|
|
|
print(f"gpt_sovits Time to make POST: {end-start}s")
|
|
|
|
end = time.perf_counter()
|
|
|
|
|
|
|
|
print(f"gpt_sovits Time to make POST: {end-start}s")
|
|
|
|
if res.status_code != 200:
|
|
|
|
|
|
|
|
print("Error:", res.text)
|
|
|
|
if res.status_code != 200:
|
|
|
|
return
|
|
|
|
print("Error:", res.text)
|
|
|
|
|
|
|
|
return
|
|
|
|
first = True
|
|
|
|
|
|
|
|
for chunk in res.iter_content(chunk_size=16000): # 1280 32K*20ms*2
|
|
|
|
first = True
|
|
|
|
if first:
|
|
|
|
|
|
|
|
end = time.perf_counter()
|
|
|
|
for chunk in res.iter_content(chunk_size=12800): # 1280 32K*20ms*2
|
|
|
|
print(f"gpt_sovits Time to first chunk: {end-start}s")
|
|
|
|
if first:
|
|
|
|
first = False
|
|
|
|
end = time.perf_counter()
|
|
|
|
if chunk and self.state==State.RUNNING:
|
|
|
|
print(f"gpt_sovits Time to first chunk: {end-start}s")
|
|
|
|
yield chunk
|
|
|
|
first = False
|
|
|
|
|
|
|
|
if chunk and self.state==State.RUNNING:
|
|
|
|
print("gpt_sovits response.elapsed:", res.elapsed)
|
|
|
|
yield chunk
|
|
|
|
|
|
|
|
#print("gpt_sovits response.elapsed:", res.elapsed)
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
print(e)
|
|
|
|
|
|
|
|
|
|
|
|
def stream_tts(self,audio_stream):
|
|
|
|
def stream_tts(self,audio_stream):
|
|
|
|
for chunk in audio_stream:
|
|
|
|
for chunk in audio_stream:
|
|
|
@ -199,26 +206,28 @@ class CosyVoiceTTS(BaseTTS):
|
|
|
|
'tts_text': text,
|
|
|
|
'tts_text': text,
|
|
|
|
'prompt_text': reftext
|
|
|
|
'prompt_text': reftext
|
|
|
|
}
|
|
|
|
}
|
|
|
|
files = [('prompt_wav', ('prompt_wav', open(reffile, 'rb'), 'application/octet-stream'))]
|
|
|
|
try:
|
|
|
|
res = requests.request("GET", f"{server_url}/inference_zero_shot", data=payload, files=files, stream=True)
|
|
|
|
files = [('prompt_wav', ('prompt_wav', open(reffile, 'rb'), 'application/octet-stream'))]
|
|
|
|
|
|
|
|
res = requests.request("GET", f"{server_url}/inference_zero_shot", data=payload, files=files, stream=True)
|
|
|
|
end = time.perf_counter()
|
|
|
|
|
|
|
|
print(f"cosy_voice Time to make POST: {end-start}s")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if res.status_code != 200:
|
|
|
|
|
|
|
|
print("Error:", res.text)
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
first = True
|
|
|
|
end = time.perf_counter()
|
|
|
|
for chunk in res.iter_content(chunk_size=16000): # 1280 32K*20ms*2
|
|
|
|
print(f"cosy_voice Time to make POST: {end-start}s")
|
|
|
|
if first:
|
|
|
|
|
|
|
|
end = time.perf_counter()
|
|
|
|
if res.status_code != 200:
|
|
|
|
print(f"cosy_voice Time to first chunk: {end-start}s")
|
|
|
|
print("Error:", res.text)
|
|
|
|
first = False
|
|
|
|
return
|
|
|
|
if chunk and self.state==State.RUNNING:
|
|
|
|
|
|
|
|
yield chunk
|
|
|
|
first = True
|
|
|
|
|
|
|
|
|
|
|
|
print("cosy_voice response.elapsed:", res.elapsed)
|
|
|
|
for chunk in res.iter_content(chunk_size=8820): # 882 22.05K*20ms*2
|
|
|
|
|
|
|
|
if first:
|
|
|
|
|
|
|
|
end = time.perf_counter()
|
|
|
|
|
|
|
|
print(f"cosy_voice Time to first chunk: {end-start}s")
|
|
|
|
|
|
|
|
first = False
|
|
|
|
|
|
|
|
if chunk and self.state==State.RUNNING:
|
|
|
|
|
|
|
|
yield chunk
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
print(e)
|
|
|
|
|
|
|
|
|
|
|
|
def stream_tts(self,audio_stream):
|
|
|
|
def stream_tts(self,audio_stream):
|
|
|
|
for chunk in audio_stream:
|
|
|
|
for chunk in audio_stream:
|
|
|
@ -261,28 +270,30 @@ class XTTS(BaseTTS):
|
|
|
|
speaker["text"] = text
|
|
|
|
speaker["text"] = text
|
|
|
|
speaker["language"] = language
|
|
|
|
speaker["language"] = language
|
|
|
|
speaker["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
|
|
|
|
speaker["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
|
|
|
|
res = requests.post(
|
|
|
|
try:
|
|
|
|
f"{server_url}/tts_stream",
|
|
|
|
res = requests.post(
|
|
|
|
json=speaker,
|
|
|
|
f"{server_url}/tts_stream",
|
|
|
|
stream=True,
|
|
|
|
json=speaker,
|
|
|
|
)
|
|
|
|
stream=True,
|
|
|
|
end = time.perf_counter()
|
|
|
|
)
|
|
|
|
print(f"xtts Time to make POST: {end-start}s")
|
|
|
|
end = time.perf_counter()
|
|
|
|
|
|
|
|
print(f"xtts Time to make POST: {end-start}s")
|
|
|
|
if res.status_code != 200:
|
|
|
|
|
|
|
|
print("Error:", res.text)
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
first = True
|
|
|
|
if res.status_code != 200:
|
|
|
|
for chunk in res.iter_content(chunk_size=960): #24K*20ms*2
|
|
|
|
print("Error:", res.text)
|
|
|
|
if first:
|
|
|
|
return
|
|
|
|
end = time.perf_counter()
|
|
|
|
|
|
|
|
print(f"xtts Time to first chunk: {end-start}s")
|
|
|
|
|
|
|
|
first = False
|
|
|
|
|
|
|
|
if chunk:
|
|
|
|
|
|
|
|
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("xtts response.elapsed:", res.elapsed)
|
|
|
|
first = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for chunk in res.iter_content(chunk_size=9600): #24K*20ms*2
|
|
|
|
|
|
|
|
if first:
|
|
|
|
|
|
|
|
end = time.perf_counter()
|
|
|
|
|
|
|
|
print(f"xtts Time to first chunk: {end-start}s")
|
|
|
|
|
|
|
|
first = False
|
|
|
|
|
|
|
|
if chunk:
|
|
|
|
|
|
|
|
yield chunk
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
print(e)
|
|
|
|
|
|
|
|
|
|
|
|
def stream_tts(self,audio_stream):
|
|
|
|
def stream_tts(self,audio_stream):
|
|
|
|
for chunk in audio_stream:
|
|
|
|
for chunk in audio_stream:
|
|
|
|