diff --git a/README.md b/README.md index c67a7a0..9b124f5 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ pip install "git+https://github.com/facebookresearch/pytorch3d.git" pip install tensorflow-gpu==2.8.0 pip install --upgrade "protobuf<=3.20.1" ``` -如果用pytorch2.1,torchvision用0.16(可以去torchvision官网根据pytorch版本找匹配的),cudatoolkit可以不用装 +如果用pytorch2.1,torchvision用0.16(可以去[torchvision官网](https://github.com/pytorch/vision)根据pytorch版本找匹配的),cudatoolkit可以不用装 安装常见问题[FAQ](/assets/faq.md) linux cuda环境搭建可以参考这篇文章 https://zhuanlan.zhihu.com/p/674972886 diff --git a/ttsreal.py b/ttsreal.py index 665ff46..b4da862 100644 --- a/ttsreal.py +++ b/ttsreal.py @@ -138,7 +138,7 @@ class VoitsTTS(BaseTTS): 'ref_audio_path':reffile, 'prompt_text':reftext, 'prompt_lang':language, - 'media_type':'raw', + 'media_type':'ogg', 'streaming_mode':True } # req["text"] = text @@ -162,7 +162,8 @@ class VoitsTTS(BaseTTS): first = True - for chunk in res.iter_content(chunk_size=12800): # 1280 32K*20ms*2 + for chunk in res.iter_content(chunk_size=None): #12800 1280 32K*20ms*2 + print('chunk len:',len(chunk)) if first: end = time.perf_counter() print(f"gpt_sovits Time to first chunk: {end-start}s") @@ -173,13 +174,29 @@ class VoitsTTS(BaseTTS): except Exception as e: print(e) + def __create_bytes_stream(self,byte_stream): + #byte_stream=BytesIO(buffer) + stream, sample_rate = sf.read(byte_stream) # [T*sample_rate,] float64 + print(f'[INFO]tts audio stream {sample_rate}: {stream.shape}') + stream = stream.astype(np.float32) + + if stream.ndim > 1: + print(f'[WARN] audio has {stream.shape[1]} channels, only use the first.') + stream = stream[:, 0] + + if sample_rate != self.sample_rate and stream.shape[0]>0: + print(f'[WARN] audio sample rate is {sample_rate}, resampling into {self.sample_rate}.') + stream = resampy.resample(x=stream, sr_orig=sample_rate, sr_new=self.sample_rate) + + return stream + def stream_tts(self,audio_stream): for chunk in audio_stream: if chunk is not None and len(chunk)>0: - stream = np.frombuffer(chunk, dtype=np.int16).astype(np.float32) / 32767 - stream = resampy.resample(x=stream, sr_orig=32000, sr_new=self.sample_rate) - #byte_stream=BytesIO(buffer) - #stream = self.__create_bytes_stream(byte_stream) + #stream = np.frombuffer(chunk, dtype=np.int16).astype(np.float32) / 32767 + #stream = resampy.resample(x=stream, sr_orig=32000, sr_new=self.sample_rate) + byte_stream=BytesIO(chunk) + stream = self.__create_bytes_stream(byte_stream) streamlen = stream.shape[0] idx=0 while streamlen >= self.chunk: