Fix Bugs, Modified Layout (#2434)

Co-authored-by: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com>
main
XXXXRT666 2 months ago committed by GitHub
parent 7d70852a3f
commit 132f6e7b8b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -31,9 +31,11 @@ warnings.simplefilter(action="ignore", category=FutureWarning)
version = model_version = os.environ.get("version", "v2") version = model_version = os.environ.get("version", "v2")
from config import name2sovits_path,name2gpt_path,change_choices,get_weights_names from config import change_choices, get_weights_names, name2gpt_path, name2sovits_path
SoVITS_names, GPT_names = get_weights_names() SoVITS_names, GPT_names = get_weights_names()
from config import pretrained_sovits_name from config import pretrained_sovits_name
path_sovits_v3 = pretrained_sovits_name["v3"] path_sovits_v3 = pretrained_sovits_name["v3"]
path_sovits_v4 = pretrained_sovits_name["v4"] path_sovits_v4 = pretrained_sovits_name["v4"]
is_exist_s2gv3 = os.path.exists(path_sovits_v3) is_exist_s2gv3 = os.path.exists(path_sovits_v3)
@ -108,6 +110,7 @@ from peft import LoraConfig, get_peft_model
from text import cleaned_text_to_sequence from text import cleaned_text_to_sequence
from text.cleaner import clean_text from text.cleaner import clean_text
from tools.assets import css, js, top_html
from tools.i18n.i18n import I18nAuto, scan_language_list from tools.i18n.i18n import I18nAuto, scan_language_list
language = os.environ.get("language", "Auto") language = os.environ.get("language", "Auto")
@ -208,8 +211,11 @@ else:
from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new from process_ckpt import get_sovits_version_from_path_fast, load_sovits_new
v3v4set = {"v3", "v4"} v3v4set = {"v3", "v4"}
def change_sovits_weights(sovits_path, prompt_language=None, text_language=None): def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
if ""in sovits_path:sovits_path=name2sovits_path[sovits_path] if "" in sovits_path:
sovits_path = name2sovits_path[sovits_path]
global vq_model, hps, version, model_version, dict_language, if_lora_v3 global vq_model, hps, version, model_version, dict_language, if_lora_v3
version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(sovits_path) version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(sovits_path)
print(sovits_path, version, model_version, if_lora_v3) print(sovits_path, version, model_version, if_lora_v3)
@ -355,7 +361,8 @@ except:
def change_gpt_weights(gpt_path): def change_gpt_weights(gpt_path):
if ""in gpt_path:gpt_path=name2gpt_path[gpt_path] if "" in gpt_path:
gpt_path = name2gpt_path[gpt_path]
global hz, max_sec, t2s_model, config global hz, max_sec, t2s_model, config
hz = 50 hz = 50
dict_s1 = torch.load(gpt_path, map_location="cpu", weights_only=False) dict_s1 = torch.load(gpt_path, map_location="cpu", weights_only=False)
@ -383,6 +390,7 @@ import torch
now_dir = os.getcwd() now_dir = os.getcwd()
def clean_hifigan_model(): def clean_hifigan_model():
global hifigan_model global hifigan_model
if hifigan_model: if hifigan_model:
@ -392,6 +400,8 @@ def clean_hifigan_model():
torch.cuda.empty_cache() torch.cuda.empty_cache()
except: except:
pass pass
def clean_bigvgan_model(): def clean_bigvgan_model():
global bigvgan_model global bigvgan_model
if bigvgan_model: if bigvgan_model:
@ -401,6 +411,8 @@ def clean_bigvgan_model():
torch.cuda.empty_cache() torch.cuda.empty_cache()
except: except:
pass pass
def clean_sv_cn_model(): def clean_sv_cn_model():
global sv_cn_model global sv_cn_model
if sv_cn_model: if sv_cn_model:
@ -411,6 +423,7 @@ def clean_sv_cn_model():
except: except:
pass pass
def init_bigvgan(): def init_bigvgan():
global bigvgan_model, hifigan_model, sv_cn_model global bigvgan_model, hifigan_model, sv_cn_model
from BigVGAN import bigvgan from BigVGAN import bigvgan
@ -429,6 +442,7 @@ def init_bigvgan():
else: else:
bigvgan_model = bigvgan_model.to(device) bigvgan_model = bigvgan_model.to(device)
def init_hifigan(): def init_hifigan():
global hifigan_model, bigvgan_model, sv_cn_model global hifigan_model, bigvgan_model, sv_cn_model
hifigan_model = Generator( hifigan_model = Generator(
@ -445,7 +459,9 @@ def init_hifigan():
hifigan_model.eval() hifigan_model.eval()
hifigan_model.remove_weight_norm() hifigan_model.remove_weight_norm()
state_dict_g = torch.load( state_dict_g = torch.load(
"%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,), map_location="cpu", weights_only=False "%s/GPT_SoVITS/pretrained_models/gsv-v4-pretrained/vocoder.pth" % (now_dir,),
map_location="cpu",
weights_only=False,
) )
print("loading vocoder", hifigan_model.load_state_dict(state_dict_g)) print("loading vocoder", hifigan_model.load_state_dict(state_dict_g))
clean_bigvgan_model() clean_bigvgan_model()
@ -455,7 +471,10 @@ def init_hifigan():
else: else:
hifigan_model = hifigan_model.to(device) hifigan_model = hifigan_model.to(device)
from sv import SV from sv import SV
def init_sv_cn(): def init_sv_cn():
global hifigan_model, bigvgan_model, sv_cn_model global hifigan_model, bigvgan_model, sv_cn_model
sv_cn_model = SV(device, is_half) sv_cn_model = SV(device, is_half)
@ -472,15 +491,16 @@ if model_version in {"v2Pro","v2ProPlus"}:
init_sv_cn() init_sv_cn()
resample_transform_dict = {} resample_transform_dict = {}
def resample(audio_tensor, sr0, sr1, device): def resample(audio_tensor, sr0, sr1, device):
global resample_transform_dict global resample_transform_dict
key = "%s-%s-%s" % (sr0, sr1, str(device)) key = "%s-%s-%s" % (sr0, sr1, str(device))
if key not in resample_transform_dict: if key not in resample_transform_dict:
resample_transform_dict[key] = torchaudio.transforms.Resample( resample_transform_dict[key] = torchaudio.transforms.Resample(sr0, sr1).to(device)
sr0, sr1
).to(device)
return resample_transform_dict[key](audio_tensor) return resample_transform_dict[key](audio_tensor)
def get_spepc(hps, filename, dtype, device, is_v2pro=False): def get_spepc(hps, filename, dtype, device, is_v2pro=False):
# audio = load_audio(filename, int(hps.data.sampling_rate)) # audio = load_audio(filename, int(hps.data.sampling_rate))
@ -491,11 +511,13 @@ def get_spepc(hps, filename,dtype,device,is_v2pro=False):
audio, sr0 = torchaudio.load(filename) audio, sr0 = torchaudio.load(filename)
if sr0 != sr1: if sr0 != sr1:
audio = audio.to(device) audio = audio.to(device)
if(audio.shape[0]==2):audio=audio.mean(0).unsqueeze(0) if audio.shape[0] == 2:
audio = audio.mean(0).unsqueeze(0)
audio = resample(audio, sr0, sr1, device) audio = resample(audio, sr0, sr1, device)
else: else:
audio = audio.to(device) audio = audio.to(device)
if(audio.shape[0]==2):audio=audio.mean(0).unsqueeze(0) if audio.shape[0] == 2:
audio = audio.mean(0).unsqueeze(0)
maxx = audio.abs().max() maxx = audio.abs().max()
if maxx > 1: if maxx > 1:
@ -875,9 +897,13 @@ def get_tts_wav(
if is_v2pro: if is_v2pro:
sv_emb = [sv_cn_model.compute_embedding3(audio_tensor)] sv_emb = [sv_cn_model.compute_embedding3(audio_tensor)]
if is_v2pro: if is_v2pro:
audio = vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers, speed=speed,sv_emb=sv_emb)[0][0] audio = vq_model.decode(
pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers, speed=speed, sv_emb=sv_emb
)[0][0]
else: else:
audio = vq_model.decode(pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers, speed=speed)[0][0] audio = vq_model.decode(
pred_semantic, torch.LongTensor(phones2).to(device).unsqueeze(0), refers, speed=speed
)[0][0]
else: else:
refer, audio_tensor = get_spepc(hps, ref_wav_path, dtype, device) refer, audio_tensor = get_spepc(hps, ref_wav_path, dtype, device)
phoneme_ids0 = torch.LongTensor(phones1).to(device).unsqueeze(0) phoneme_ids0 = torch.LongTensor(phones1).to(device).unsqueeze(0)
@ -1076,6 +1102,7 @@ def process_text(texts):
_text.append(text) _text.append(text)
return _text return _text
def html_center(text, label="p"): def html_center(text, label="p"):
return f"""<div style="text-align: center; margin: 100; padding: 50;"> return f"""<div style="text-align: center; margin: 100; padding: 50;">
<{label} style="margin: 0; padding: 0;">{text}</{label}> <{label} style="margin: 0; padding: 0;">{text}</{label}>
@ -1088,11 +1115,13 @@ def html_left(text, label="p"):
</div>""" </div>"""
with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False) as app: with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css) as app:
gr.Markdown( gr.HTML(
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") top_html.format(
+ "<br>" i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.")
+ i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
),
elem_classes="markdown",
) )
with gr.Group(): with gr.Group():
gr.Markdown(html_center(i18n("模型切换"), "h3")) gr.Markdown(html_center(i18n("模型切换"), "h3"))

@ -47,6 +47,7 @@ import gradio as gr
from TTS_infer_pack.text_segmentation_method import get_method from TTS_infer_pack.text_segmentation_method import get_method
from TTS_infer_pack.TTS import NO_PROMPT_ERROR, TTS, TTS_Config from TTS_infer_pack.TTS import NO_PROMPT_ERROR, TTS, TTS_Config
from tools.assets import css, js, top_html
from tools.i18n.i18n import I18nAuto, scan_language_list from tools.i18n.i18n import I18nAuto, scan_language_list
language = os.environ.get("language", "Auto") language = os.environ.get("language", "Auto")
@ -98,9 +99,11 @@ cut_method = {
i18n("按标点符号切"): "cut5", i18n("按标点符号切"): "cut5",
} }
from config import name2sovits_path,name2gpt_path,change_choices,get_weights_names from config import change_choices, get_weights_names, name2gpt_path, name2sovits_path
SoVITS_names, GPT_names = get_weights_names() SoVITS_names, GPT_names = get_weights_names()
from config import pretrained_sovits_name from config import pretrained_sovits_name
path_sovits_v3 = pretrained_sovits_name["v3"] path_sovits_v3 = pretrained_sovits_name["v3"]
path_sovits_v4 = pretrained_sovits_name["v4"] path_sovits_v4 = pretrained_sovits_name["v4"]
is_exist_s2gv3 = os.path.exists(path_sovits_v3) is_exist_s2gv3 = os.path.exists(path_sovits_v3)
@ -111,10 +114,12 @@ tts_config.device = device
tts_config.is_half = is_half tts_config.is_half = is_half
tts_config.version = version tts_config.version = version
if gpt_path is not None: if gpt_path is not None:
if ""in gpt_path:gpt_path=name2gpt_path[gpt_path] if "" in gpt_path:
gpt_path = name2gpt_path[gpt_path]
tts_config.t2s_weights_path = gpt_path tts_config.t2s_weights_path = gpt_path
if sovits_path is not None: if sovits_path is not None:
if ""in sovits_path:sovits_path=name2sovits_path[sovits_path] if "" in sovits_path:
sovits_path = name2sovits_path[sovits_path]
tts_config.vits_weights_path = sovits_path tts_config.vits_weights_path = sovits_path
if cnhubert_base_path is not None: if cnhubert_base_path is not None:
tts_config.cnhuhbert_base_path = cnhubert_base_path tts_config.cnhuhbert_base_path = cnhubert_base_path
@ -189,6 +194,7 @@ def custom_sort_key(s):
parts = [int(part) if part.isdigit() else part for part in parts] parts = [int(part) if part.isdigit() else part for part in parts]
return parts return parts
if os.path.exists("./weight.json"): if os.path.exists("./weight.json"):
pass pass
else: else:
@ -206,9 +212,13 @@ with open("./weight.json", "r", encoding="utf-8") as file:
sovits_path = sovits_path[0] sovits_path = sovits_path[0]
from process_ckpt import get_sovits_version_from_path_fast from process_ckpt import get_sovits_version_from_path_fast
v3v4set = {"v3", "v4"} v3v4set = {"v3", "v4"}
def change_sovits_weights(sovits_path, prompt_language=None, text_language=None): def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
if ""in sovits_path:sovits_path=name2sovits_path[sovits_path] if "" in sovits_path:
sovits_path = name2sovits_path[sovits_path]
global version, model_version, dict_language, if_lora_v3 global version, model_version, dict_language, if_lora_v3
version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(sovits_path) version, model_version, if_lora_v3 = get_sovits_version_from_path_fast(sovits_path)
# print(sovits_path,version, model_version, if_lora_v3) # print(sovits_path,version, model_version, if_lora_v3)
@ -273,11 +283,13 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
f.write(json.dumps(data)) f.write(json.dumps(data))
with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False) as app: with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css) as app:
gr.Markdown( gr.HTML(
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") top_html.format(
+ "<br>" i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.")
+ i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
),
elem_classes="markdown",
) )
with gr.Column(): with gr.Column():

@ -0,0 +1,112 @@
js = """
function createGradioAnimation() {
const params = new URLSearchParams(window.location.search);
if (params.get('__theme') !== 'light') {
params.set('__theme', 'light'); // 仅当 __theme 不是 'light' 时设置为 'light'
window.location.search = params.toString(); // 更新 URL触发页面刷新
}
var container = document.createElement('div');
container.id = 'gradio-animation';
container.style.fontSize = '2em';
container.style.fontWeight = '500';
container.style.textAlign = 'center';
container.style.marginBottom = '20px';
container.style.fontFamily = '-apple-system, sans-serif, Arial, Calibri';
var text = 'Welcome to GPT-SoVITS !';
for (var i = 0; i < text.length; i++) {
(function(i){
setTimeout(function(){
var letter = document.createElement('span');
letter.style.opacity = '0';
letter.style.transition = 'opacity 0.5s';
letter.innerText = text[i];
container.appendChild(letter);
setTimeout(function() {
letter.style.opacity = '1';
}, 50);
}, i * 250);
})(i);
}
return 'Animation created';
}
"""
css = """
/* CSSStyleRule */
.markdown {
background-color: lightblue;
padding: 10px;
}
.checkbox_info {
color: var(--block-title-text-color) !important;
font-size: var(--block-title-text-size) !important;
font-weight: var(--block-title-text-weight) !important;
height: 22px;
margin-bottom: 8px !important;
}
::selection {
background: #ffc078; !important;
}
#checkbox_train_dpo input[type="checkbox"]{
margin-top: 6px;
}
#checkbox_train_dpo span {
margin-top: 6px;
}
#checkbox_align_train {
padding-top: 18px;
padding-bottom: 18px;
}
#checkbox_align_infer input[type="checkbox"] {
margin-top: 10px;
}
#checkbox_align_infer span {
margin-top: 10px;
}
footer {
height: 50px !important; /* 设置页脚高度 */
background-color: transparent !important; /* 背景透明 */
display: flex;
justify-content: center; /* 居中对齐 */
align-items: center; /* 垂直居中 */
}
footer * {
display: none !important; /* 隐藏所有子元素 */
}
"""
top_html = """
<div align="center">
<div style="margin-bottom: 20px; font-size: 15px;">{}</div>
<div style="display: flex; gap: 80px; justify-content: center;">
<a href="https://github.com/RVC-Boss/GPT-SoVITS" target="_blank">
<img src="https://img.shields.io/badge/GitHub-GPT--SoVITS-blue.svg?style=for-the-badge&logo=github" style="width: auto; height: 30px;">
</a>
<a href="https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e" target="_blank">
<img src="https://img.shields.io/badge/简体中文-阅读文档-blue?style=for-the-badge&logo=googledocs&logoColor=white" style="width: auto; height: 30px;">
</a>
<a href="https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e" target="_blank">
<img src="https://img.shields.io/badge/English-READ%20DOCS-blue?style=for-the-badge&logo=googledocs&logoColor=white" style="width: auto; height: 30px;">
</a>
<a href="https://github.com/RVC-Boss/GPT-SoVITS/blob/main/LICENSE" target="_blank">
<img src="https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge&logo=opensourceinitiative" style="width: auto; height: 30px;">
</a>
</div>
</div>
"""

@ -60,6 +60,7 @@ import shutil
import subprocess import subprocess
from subprocess import Popen from subprocess import Popen
from tools.assets import css, js, top_html
from tools.i18n.i18n import I18nAuto, scan_language_list from tools.i18n.i18n import I18nAuto, scan_language_list
language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto" language = sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
@ -1299,14 +1300,13 @@ def sync(text):
return {"__type__": "update", "value": text} return {"__type__": "update", "value": text}
with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False) as app: with gr.Blocks(title="GPT-SoVITS WebUI", analytics_enabled=False, js=js, css=css) as app:
gr.Markdown( gr.HTML(
value=i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.") top_html.format(
i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.")
+ i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") + i18n("如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.")
+ "<br>" ),
+ i18n("中文教程文档") elem_classes="markdown",
+ ": "
+ "https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e"
) )
with gr.Tabs(): with gr.Tabs():

Loading…
Cancel
Save