From c38b16901978c1db79491e16905ea3a37a7cf686 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:50:12 +0800 Subject: [PATCH] =?UTF-8?q?v3sovits=E6=A8=A1=E5=9E=8B=E6=8E=A8=E7=90=86?= =?UTF-8?q?=E6=94=AF=E6=8C=81webui=E4=BC=A0=E8=AF=AD=E9=80=9F=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E8=B0=83=E6=95=B4=E5=90=88=E6=88=90=E8=AF=AD=E9=80=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v3sovits模型推理支持webui传语速参数调整合成语速 --- GPT_SoVITS/inference_webui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 81c1dd7..03b8e34 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -641,7 +641,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, if(len(refers)==0):refers = [get_spepc(hps, ref_wav_path).to(dtype).to(device)] audio = vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers,speed=speed)[0][0]#.cpu().detach().numpy() else: - refer = get_spepc(hps, ref_wav_path).to(device).to(dtype)#######这里要重采样切到32k,因为src是24k的,没有单独的32k的src,所以不能改成2个路径 + refer = get_spepc(hps, ref_wav_path).to(device).to(dtype) phoneme_ids0=torch.LongTensor(phones1).to(device).unsqueeze(0) phoneme_ids1=torch.LongTensor(phones2).to(device).unsqueeze(0) # print(11111111, phoneme_ids0, phoneme_ids1) @@ -666,7 +666,7 @@ def get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, # print("fea_ref",fea_ref,fea_ref.shape) # print("mel2",mel2) mel2=mel2.to(dtype) - fea_todo, ge = vq_model.decode_encp(pred_semantic, phoneme_ids1, refer, ge) + fea_todo, ge = vq_model.decode_encp(pred_semantic, phoneme_ids1, refer, ge,speed) # print("fea_todo",fea_todo) # print("ge",ge.abs().mean()) cfm_resss = []