Make Gradio Great Again
附带vs2017下载链接
增加了各种数据集检查,若缺失会弹出warning
修复了warning乱弹的bug
修复了参考音频混合只能上传一条的bug
修复了下载G2PW Model出现import错误的bug
修复了Windows训练无N卡在预处理阶段的报错
main
XXXXRT666 12 months ago committed by GitHub
parent 7cfe578968
commit ea56b814bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,5 +1,5 @@
import os, sys
now_dir = os.getcwd()
sys.path.insert(0, now_dir)
from .text.g2pw import G2PWPinyin
from text.g2pw import G2PWPinyin
g2pw = G2PWPinyin(model_dir="GPT_SoVITS/text/G2PWModel",model_source="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",v_to_u=False, neutral_tone_with_five=True)

@ -695,13 +695,14 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.Row():
inp_ref = gr.Audio(label=i18n("请上传3~10秒内参考音频超过会报错"), type="filepath", scale=13)
with gr.Column(scale=13):
ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True)
ref_text_free = gr.Checkbox(label=i18n("开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True,scale=1)
gr.Markdown(html_left(i18n("使用无参考文本模式时建议使用微调的GPT听不清参考音频说的啥(不晓得写啥)可以开。<br>开启后无视填写的参考文本。")))
prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=3, max_lines=3)
prompt_language = gr.Dropdown(
label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"), scale=14
)
inp_refs = gr.File(label=i18n("可选项:通过拖拽多个文件上传多个参考音频(建议同性),平均融合他们的音色。如不填写此项,音色由左侧单个参考音频控制。如是微调模型,建议参考音频全部在微调训练集音色内,底模不用管。"),file_count="file_count",scale=13)
prompt_text = gr.Textbox(label=i18n("参考音频的文本"), value="", lines=5, max_lines=5,scale=1)
with gr.Column(scale=14):
prompt_language = gr.Dropdown(
label=i18n("参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文"),
)
inp_refs = gr.File(label=i18n("可选项:通过拖拽多个文件上传多个参考音频(建议同性),平均融合他们的音色。如不填写此项,音色由左侧单个参考音频控制。如是微调模型,建议参考音频全部在微调训练集音色内,底模不用管。"),file_count="multiple")
gr.Markdown(html_center(i18n("*请填写需要合成的目标文本和语种模式"),'h3'))
with gr.Row():
with gr.Column(scale=13):

@ -7,7 +7,8 @@ inp_wav_dir = os.environ.get("inp_wav_dir")
exp_name = os.environ.get("exp_name")
i_part = os.environ.get("i_part")
all_parts = os.environ.get("all_parts")
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get("_CUDA_VISIBLE_DEVICES")
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
opt_dir = os.environ.get("opt_dir")
bert_pretrained_dir = os.environ.get("bert_pretrained_dir")
import torch

@ -6,7 +6,8 @@ inp_wav_dir= os.environ.get("inp_wav_dir")
exp_name= os.environ.get("exp_name")
i_part= os.environ.get("i_part")
all_parts= os.environ.get("all_parts")
os.environ["CUDA_VISIBLE_DEVICES"]= os.environ.get("_CUDA_VISIBLE_DEVICES")
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
from feature_extractor import cnhubert
opt_dir= os.environ.get("opt_dir")
cnhubert.cnhubert_base_path= os.environ.get("cnhubert_base_dir")

@ -4,7 +4,8 @@ inp_text = os.environ.get("inp_text")
exp_name = os.environ.get("exp_name")
i_part = os.environ.get("i_part")
all_parts = os.environ.get("all_parts")
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ.get("_CUDA_VISIBLE_DEVICES")
if "_CUDA_VISIBLE_DEVICES" in os.environ:
os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
opt_dir = os.environ.get("opt_dir")
pretrained_s2G = os.environ.get("pretrained_s2G")
s2config_path = os.environ.get("s2config_path")

@ -18,7 +18,7 @@ logging.getLogger("matplotlib").setLevel(logging.ERROR)
MATPLOTLIB_FLAG = False
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
logger = logging
@ -319,13 +319,13 @@ def check_git_hash(model_dir):
def get_logger(model_dir, filename="train.log"):
global logger
logger = logging.getLogger(os.path.basename(model_dir))
logger.setLevel(logging.DEBUG)
logger.setLevel(logging.ERROR)
formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
if not os.path.exists(model_dir):
os.makedirs(model_dir)
h = logging.FileHandler(os.path.join(model_dir, filename))
h.setLevel(logging.DEBUG)
h.setLevel(logging.ERROR)
h.setFormatter(formatter)
logger.addHandler(h)
return logger

@ -24,7 +24,7 @@ A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.<br><br>
2. **Few-shot TTS:** Fine-tune the model with just 1 minute of training data for improved voice similarity and realism.
3. **Cross-lingual Support:** Inference in languages different from the training dataset, currently supporting English, Japanese, and Chinese.
3. **Cross-lingual Support:** Inference in languages different from the training dataset, currently supporting English, Japanese, Korean, Cantonese and Chinese.
4. **WebUI Tools:** Integrated tools include voice accompaniment separation, automatic training set segmentation, Chinese ASR, and text labeling, assisting beginners in creating training datasets and GPT/SoVITS models.
@ -99,7 +99,7 @@ conda install -c conda-forge 'ffmpeg<7'
Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) and [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) in the GPT-SoVITS root.
Install [Visual Studio 2022](https://visualstudio.microsoft.com/downloads/) (Korean TTS Only)
Install [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) (Korean TTS Only)
##### MacOS Users
```bash

@ -24,7 +24,7 @@
2. **少样本 TTS** 仅需 1 分钟的训练数据即可微调模型,提升声音相似度和真实感。
3. **跨语言支持:** 支持与训练数据集不同语言的推理,目前支持英语、日语和中文。
3. **跨语言支持:** 支持与训练数据集不同语言的推理,目前支持英语、日语、韩语、粤语和中文。
4. **WebUI 工具:** 集成工具包括声音伴奏分离、自动训练集分割、中文自动语音识别(ASR)和文本标注,协助初学者创建训练数据集和 GPT/SoVITS 模型。
@ -99,7 +99,7 @@ conda install -c conda-forge 'ffmpeg<7'
下载并将 [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe) 和 [ffprobe.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe) 放置在 GPT-SoVITS 根目录下。
安装 [Visual Studio 2022](https://visualstudio.microsoft.com/zh-hans/downloads/) 环境(仅限韩语TTS)
安装 [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) 环境(仅限韩语TTS)
##### MacOS 用户
```bash

@ -24,7 +24,7 @@
2. **数ショット TTS:** わずか 1 分間のトレーニングデータでモデルを微調整し、音声の類似性とリアリズムを向上。
3. **多言語サポート:** 現在、英語、日本語、中国語をサポートしています。
3. **多言語サポート:** 現在、英語、日本語、韓語、粵語、中国語をサポートしています。
4. **WebUI ツール:** 統合されたツールには、音声伴奏の分離、トレーニングセットの自動セグメンテーション、中国語 ASR、テキストラベリングが含まれ、初心者がトレーニングデータセットと GPT/SoVITS モデルを作成するのを支援します。

@ -24,7 +24,7 @@
2. **소량의 데이터 TTS:** 1분의 훈련 데이터만으로 모델을 미세 조정하여 음성 유사도와 실제감을 향상시킬 수 있습니다.
3. **다국어 지원:** 훈련 데이터셋과 다른 언어의 추론을 지원하며, 현재 영어, 일본어, 중국어를 지원합니다.
3. **다국어 지원:** 훈련 데이터셋과 다른 언어의 추론을 지원하며, 현재 영어, 일본어, 중국어, 광둥어, 한국어를 지원합니다.
4. **WebUI 도구:** 음성 반주 분리, 자동 훈련 데이터셋 분할, 중국어 자동 음성 인식(ASR) 및 텍스트 주석 등의 도구를 통합하여 초보자가 훈련 데이터셋과 GPT/SoVITS 모델을 생성하는 데 도움을 줍니다.

@ -24,7 +24,7 @@ Güçlü Birkaç Örnekli Ses Dönüştürme ve Metinden Konuşmaya Web Arayüz
2. **Birkaç Örnekli Metinden Konuşmaya:** Daha iyi ses benzerliği ve gerçekçiliği için modeli yalnızca 1 dakikalık eğitim verisiyle ince ayarlayın.
3. **Çapraz Dil Desteği:** Eğitim veri setinden farklı dillerde çıkarım, şu anda İngilizce, Japonca ve Çinceyi destekliyor.
3. **Çapraz Dil Desteği:** Eğitim veri setinden farklı dillerde çıkarım, şu anda İngilizce, Japonca, Çince, Kantonca ve Koreceyi destekliyor.
4. **Web Arayüzü Araçları:** Entegre araçlar arasında vokal eşliğinde ayırma, otomatik eğitim seti segmentasyonu, Çince ASR ve metin etiketleme bulunur ve yeni başlayanların eğitim veri setleri ve GPT/SoVITS modelleri oluşturmalarına yardımcı olur.

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Pretrained SSL model path",
"预训练的SoVITS-D模型路径": "Pretrained SoVITS-D model path",
"预训练的SoVITS-G模型路径": "Pretrained SoVITS-G model path",
"预训练的中文BERT模型路径": " Pretrained BERT model path"
"预训练的中文BERT模型路径": " Pretrained BERT model path",
"音频加载失败": "Failed to Load Audio",
"以下文件或文件夹不存在": "No Such File or Folder",
"路径不能为空": "Expected No Empty Path",
"请填入正确的List路径": "Please Fill in the Correct List Path",
"请填入正确的音频文件夹路径": "Please Fill in the Correct Audio Folder Path",
"路径错误": "Path Error",
"缺少音素数据集": "Missing Phoneme Dataset",
"缺少Hubert数据集": "Missing Hubert Dataset",
"缺少音频数据集": "Missing Audio Dataset",
"缺少语义数据集": "Missing Semantics Dataset"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Ruta del modelo SSL preentrenado",
"预训练的SoVITS-D模型路径": "Ruta del modelo SoVITS-D preentrenado",
"预训练的SoVITS-G模型路径": "Ruta del modelo SoVITS-G preentrenado",
"预训练的中文BERT模型路径": "Ruta del modelo BERT en chino preentrenado"
}
"预训练的中文BERT模型路径": "Ruta del modelo BERT en chino preentrenado",
"音频加载失败": "Error al Cargar el Audio",
"以下文件或文件夹不存在": "No Existe Tal Archivo o Carpeta",
"路径不能为空": "Se Espera que la Ruta No Esté Vacía",
"请填入正确的List路径": "Por Favor, Introduzca la Ruta Correcta de la Lista",
"请填入正确的音频文件夹路径": "Por Favor, Introduzca la Ruta Correcta de la Carpeta de Audio",
"路径错误": "Error de Ruta",
"缺少音素数据集": "Falta el Conjunto de Datos de Fonemas",
"缺少Hubert数据集": "Falta el Conjunto de Datos de Hubert",
"缺少音频数据集": "Falta el Conjunto de Datos de Audio",
"缺少语义数据集": "Falta el Conjunto de Datos Semánticos"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Chemin du modèle SSL pré-entraîné",
"预训练的SoVITS-D模型路径": "Chemin du modèle SoVITS-D pré-entraîné",
"预训练的SoVITS-G模型路径": "Chemin du modèle SoVITS-G pré-entraîné",
"预训练的中文BERT模型路径": "Chemin du modèle BERT chinois pré-entraîné"
}
"预训练的中文BERT模型路径": "Chemin du modèle BERT chinois pré-entraîné",
"音频加载失败": "Échec du Chargement de l'Audio",
"以下文件或文件夹不存在": "Aucun Fichier ou Dossier de ce Type",
"路径不能为空": "Chemin Non Vide Attendu",
"请填入正确的List路径": "Veuillez Remplir le Chemin Correct de la Liste",
"请填入正确的音频文件夹路径": "Veuillez Remplir le Chemin Correct du Dossier Audio",
"路径错误": "Erreur de Chemin",
"缺少音素数据集": "Jeu de Données de Phonèmes Manquant",
"缺少Hubert数据集": "Jeu de Données Hubert Manquant",
"缺少音频数据集": "Jeu de Données Audio Manquant",
"缺少语义数据集": "Jeu de Données Sémantiques Manquant"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Percorso del modello SSL preaddestrato",
"预训练的SoVITS-D模型路径": "Percorso del modello preaddestrato SoVITS-D",
"预训练的SoVITS-G模型路径": "Percorso del modello preaddestrato SoVITS-G",
"预训练的中文BERT模型路径": "Percorso del modello BERT cinese preaddestrato"
}
"预训练的中文BERT模型路径": "Percorso del modello BERT cinese preaddestrato",
"音频加载失败": "Caricamento Audio Fallito",
"以下文件或文件夹不存在": "Nessun File o Cartella di Questo Tipo",
"路径不能为空": "Percorso Vuoto Non Consentito",
"请填入正确的List路径": "Si Prega di Inserire il Percorso Corretto della Lista",
"请填入正确的音频文件夹路径": "Si Prega di Inserire il Percorso Corretto della Cartella Audio",
"路径错误": "Errore di Percorso",
"缺少音素数据集": "Dataset di Fonemi Mancante",
"缺少Hubert数据集": "Dataset di Hubert Mancante",
"缺少音频数据集": "Dataset Audio Mancante",
"缺少语义数据集": "Dataset Semantico Mancante"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "事前にトレーニングされたSSLモデルのパス",
"预训练的SoVITS-D模型路径": "事前にトレーニングされたSoVITS-Dモデルのパス",
"预训练的SoVITS-G模型路径": "事前にトレーニングされたSoVITS-Gモデルのパス",
"预训练的中文BERT模型路径": "事前にトレーニングされた中文BERTモデルのパス"
}
"预训练的中文BERT模型路径": "事前にトレーニングされた中文BERTモデルのパス",
"音频加载失败": "音声の読み込みに失敗しました",
"以下文件或文件夹不存在": "そのようなファイルまたはフォルダは存在しません",
"路径不能为空": "空のパスは予期されていません",
"请填入正确的List路径": "正しいリストパスを入力してください",
"请填入正确的音频文件夹路径": "正しいオーディオフォルダパスを入力してください",
"路径错误": "パスエラー",
"缺少音素数据集": "音素データセットが欠落しています",
"缺少Hubert数据集": "Hubertデータセットが欠落しています",
"缺少音频数据集": "オーディオデータセットが欠落しています",
"缺少语义数据集": "セマンティクスデータセットが欠落しています"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "사전 훈련된 SSL 모델 경로",
"预训练的SoVITS-D模型路径": "사전 훈련된 SoVITS-D 모델 경로",
"预训练的SoVITS-G模型路径": "사전 훈련된 SoVITS-G 모델 경로",
"预训练的中文BERT模型路径": "사전 훈련된 중국어 BERT 모델 경로"
}
"预训练的中文BERT模型路径": "사전 훈련된 중국어 BERT 모델 경로",
"音频加载失败": "오디오 로드 실패",
"以下文件或文件夹不存在": "해당 파일이나 폴더가 없습니다",
"路径不能为空": "경로가 비어 있을 수 없습니다",
"请填入正确的List路径": "올바른 리스트 경로를 입력하세요",
"请填入正确的音频文件夹路径": "올바른 오디오 폴더 경로를 입력하세요",
"路径错误": "경로 오류",
"缺少音素数据集": "음소 데이터셋이 없습니다",
"缺少Hubert数据集": "Hubert 데이터셋이 없습니다",
"缺少音频数据集": "오디오 데이터셋이 없습니다",
"缺少语义数据集": "의미론 데이터셋이 없습니다"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Caminho do modelo SSL pre-train",
"预训练的SoVITS-D模型路径": "Caminho do modelo SoVITS-D pre-train",
"预训练的SoVITS-G模型路径": "Caminho do modelo SoVITS-G pre-train",
"预训练的中文BERT模型路径": "Caminho do modelo BERT chinês pre-train"
}
"预训练的中文BERT模型路径": "Caminho do modelo BERT chinês pre-train",
"音频加载失败": "Falha ao Carregar o Áudio",
"以下文件或文件夹不存在": "Nenhum Arquivo ou Pasta Encontrado",
"路径不能为空": "Esperado Caminho Não Vazio",
"请填入正确的List路径": "Por Favor, Insira o Caminho Correto da Lista",
"请填入正确的音频文件夹路径": "Por Favor, Insira o Caminho Correto da Pasta de Áudio",
"路径错误": "Erro de Caminho",
"缺少音素数据集": "Conjunto de Dados de Fonemas Ausente",
"缺少Hubert数据集": "Conjunto de Dados Hubert Ausente",
"缺少音频数据集": "Conjunto de Dados de Áudio Ausente",
"缺少语义数据集": "Conjunto de Dados Semânticos Ausente"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Путь к предварительно обученной модели SSL",
"预训练的SoVITS-D模型路径": "Путь к предварительно обученной модели SoVITS-D",
"预训练的SoVITS-G模型路径": "Путь к предварительно обученной модели SoVITS-G",
"预训练的中文BERT模型路径": "Путь к предварительно обученной китайской модели BERT"
}
"预训练的中文BERT模型路径": "Путь к предварительно обученной китайской модели BERT",
"音频加载失败": "Не удалось загрузить аудио",
"以下文件或文件夹不存在": "Такого файла или папки не существует",
"路径不能为空": "Ожидается, что путь не будет пустым",
"请填入正确的List路径": "Пожалуйста, укажите правильный путь к списку",
"请填入正确的音频文件夹路径": "Пожалуйста, укажите правильный путь к папке с аудио",
"路径错误": "Ошибка пути",
"缺少音素数据集": "Отсутствует набор данных фонем",
"缺少Hubert数据集": "Отсутствует набор данных Hubert",
"缺少音频数据集": "Отсутствует набор данных аудио",
"缺少语义数据集": "Отсутствует семантический набор данных"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "Ön eğitilmiş SSL model yolu",
"预训练的SoVITS-D模型路径": "Ön eğitilmiş SoVITS-D model yolu",
"预训练的SoVITS-G模型路径": "Ön eğitilmiş SoVITS-G model yolu",
"预训练的中文BERT模型路径": "Ön eğitilmiş Çince BERT model yolu"
}
"预训练的中文BERT模型路径": "Ön eğitilmiş Çince BERT model yolu",
"音频加载失败": "Ses Yüklenemedi",
"以下文件或文件夹不存在": "Böyle Bir Dosya veya Klasör Yok",
"路径不能为空": "Boş Yol Beklenmiyor",
"请填入正确的List路径": "Lütfen Doğru Liste Yolunu Girin",
"请填入正确的音频文件夹路径": "Lütfen Doğru Ses Klasörü Yolunu Girin",
"路径错误": "Yol Hatası",
"缺少音素数据集": "Fonem Veri Seti Eksik",
"缺少Hubert数据集": "Hubert Veri Seti Eksik",
"缺少音频数据集": "Ses Veri Seti Eksik",
"缺少语义数据集": "Anlamsal Veri Seti Eksik"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "预训练的SSL模型路径",
"预训练的SoVITS-D模型路径": "预训练的SoVITS-D模型路径",
"预训练的SoVITS-G模型路径": "预训练的SoVITS-G模型路径",
"预训练的中文BERT模型路径": "预训练的中文BERT模型路径"
"预训练的中文BERT模型路径": "预训练的中文BERT模型路径",
"音频加载失败": "音频加载失败",
"以下文件或文件夹不存在": "以下文件或文件夹不存在",
"路径不能为空": "路径不能为空",
"请填入正确的List路径": "请填入正确的List路径",
"请填入正确的音频文件夹路径": "请填入正确的音频文件夹路径",
"路径错误": "路径错误",
"缺少音素数据集": "缺少音素数据集",
"缺少Hubert数据集": "缺少Hubert数据集",
"缺少音频数据集": "缺少音频数据集",
"缺少语义数据集": "缺少语义数据集"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "預訓練的SSL模型路徑",
"预训练的SoVITS-D模型路径": "預訓練的SoVITS-D模型路徑",
"预训练的SoVITS-G模型路径": "預訓練的SoVITS-G模型路徑",
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑"
}
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑",
"音频加载失败": "無法加載音頻",
"以下文件或文件夹不存在": "沒有此文件或文件夾",
"路径不能为空": "路徑不應該為空",
"请填入正确的List路径": "請填寫正確的列表路徑",
"请填入正确的音频文件夹路径": "請填寫正確的音頻文件夾路徑",
"路径错误": "路徑錯誤",
"缺少音素数据集": "缺少音素數據集",
"缺少Hubert数据集": "缺少Hubert數據集",
"缺少音频数据集": "缺少音頻數據集",
"缺少语义数据集": "缺少語義數據集"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "預訓練的SSL模型路徑",
"预训练的SoVITS-D模型路径": "預訓練的SoVITS-D模型路徑",
"预训练的SoVITS-G模型路径": "預訓練的SoVITS-G模型路徑",
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑"
}
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑",
"音频加载失败": "無法加載音頻",
"以下文件或文件夹不存在": "沒有此文件或文件夾",
"路径不能为空": "路徑不應該為空",
"请填入正确的List路径": "請填寫正確的列表路徑",
"请填入正确的音频文件夹路径": "請填寫正確的音頻文件夾路徑",
"路径错误": "路徑錯誤",
"缺少音素数据集": "缺少音素數據集",
"缺少Hubert数据集": "缺少Hubert數據集",
"缺少音频数据集": "缺少音頻數據集",
"缺少语义数据集": "缺少語義數據集"
}

@ -174,5 +174,15 @@
"预训练的SSL模型路径": "預訓練的SSL模型路徑",
"预训练的SoVITS-D模型路径": "預訓練的SoVITS-D模型路徑",
"预训练的SoVITS-G模型路径": "預訓練的SoVITS-G模型路徑",
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑"
}
"预训练的中文BERT模型路径": "預訓練的中文BERT模型路徑",
"音频加载失败": "無法加載音頻",
"以下文件或文件夹不存在": "沒有此文件或文件夾",
"路径不能为空": "路徑不應該為空",
"请填入正确的List路径": "請填寫正確的列表路徑",
"请填入正确的音频文件夹路径": "請填寫正確的音頻文件夾路徑",
"路径错误": "路徑錯誤",
"缺少音素数据集": "缺少音素數據集",
"缺少Hubert数据集": "缺少Hubert數據集",
"缺少音频数据集": "缺少音頻數據集",
"缺少语义数据集": "缺少語義數據集"
}

@ -1,7 +1,10 @@
import platform,os,traceback
import ffmpeg
import numpy as np
import gradio as gr
from tools.i18n.i18n import I18nAuto
import pandas as pd
i18n = I18nAuto(language=os.environ.get('language','Auto'))
def load_audio(file, sr):
try:
@ -20,7 +23,7 @@ def load_audio(file, sr):
)
except Exception as e:
traceback.print_exc()
raise RuntimeError(f"Failed to load audio: {e}")
raise RuntimeError(i18n("音频加载失败"))
return np.frombuffer(out, np.float32).flatten()
@ -30,3 +33,83 @@ def clean_path(path_str:str):
return clean_path(path_str[0:-1])
path_str = path_str.replace('/', os.sep).replace('\\', os.sep)
return path_str.strip(" ").strip('\'').strip("\n").strip('"').strip(" ").strip("\u202a")
def check_for_existance(file_list:list=None,is_train=False,is_dataset_processing=False):
files_status=[]
if is_train == True and file_list:
file_list.append(os.path.join(file_list[0],'2-name2text.txt'))
file_list.append(os.path.join(file_list[0],'3-bert'))
file_list.append(os.path.join(file_list[0],'4-cnhubert'))
file_list.append(os.path.join(file_list[0],'5-wav32k'))
file_list.append(os.path.join(file_list[0],'6-name2semantic.tsv'))
for file in file_list:
if os.path.exists(file):files_status.append(True)
else:files_status.append(False)
if sum(files_status)!=len(files_status):
if is_train:
for file,status in zip(file_list,files_status):
if status:pass
else:gr.Warning(file)
gr.Warning(i18n('以下文件或文件夹不存在'))
return False
elif is_dataset_processing:
if files_status[0]:
return True
elif not files_status[0]:
gr.Warning(file_list[0])
elif not files_status[1] and file_list[1]:
gr.Warning(file_list[1])
gr.Warning(i18n('以下文件或文件夹不存在'))
return False
else:
if file_list[0]:
gr.Warning(file_list[0])
gr.Warning(i18n('以下文件或文件夹不存在'))
else:
gr.Warning(i18n('路径不能为空'))
return False
return True
def check_details(path_list=None,is_train=False,is_dataset_processing=False):
if is_dataset_processing:
list_path, audio_path = path_list
if (not list_path.endswith('.list')):
gr.Warning(i18n('请填入正确的List路径'))
return
if audio_path:
if not os.path.isdir(audio_path):
gr.Warning(i18n('请填入正确的音频文件夹路径'))
return
with open(list_path,"r",encoding="utf8")as f:
line=f.readline().strip("\n").split("\n")
wav_name, _, __, ___ = line[0].split("|")
wav_name=clean_path(wav_name)
if (audio_path != "" and audio_path != None):
wav_name = os.path.basename(wav_name)
wav_path = "%s/%s"%(audio_path, wav_name)
else:
wav_path=wav_name
if os.path.exists(wav_path):
...
else:
gr.Warning(i18n('路径错误'))
return
if is_train:
path_list.append(os.path.join(path_list[0],'2-name2text.txt'))
path_list.append(os.path.join(path_list[0],'4-cnhubert'))
path_list.append(os.path.join(path_list[0],'5-wav32k'))
path_list.append(os.path.join(path_list[0],'6-name2semantic.tsv'))
phone_path, hubert_path, wav_path, semantic_path = path_list[1:]
with open(phone_path,'r',encoding='utf-8') as f:
if f.read(1):...
else:gr.Warning(i18n('缺少音素数据集'))
if os.listdir(hubert_path):...
else:gr.Warning(i18n('缺少Hubert数据集'))
if os.listdir(wav_path):...
else:gr.Warning(i18n('缺少音频数据集'))
df = pd.read_csv(
semantic_path, delimiter="\t", encoding="utf-8"
)
if len(df) >= 1:...
else:gr.Warning(i18n('缺少语义数据集'))

@ -25,6 +25,16 @@ is_half=eval(sys.argv[2])
webui_port_uvr5=int(sys.argv[3])
is_share=eval(sys.argv[4])
def html_left(text, label='p'):
return f"""<div style="text-align: left; margin: 0; padding: 0;">
<{label} style="margin: 0; padding: 0;">{text}</{label}>
</div>"""
def html_center(text, label='p'):
return f"""<div style="text-align: center; margin: 100; padding: 50;">
<{label} style="margin: 0; padding: 0;">{text}</{label}>
</div>"""
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0):
infos = []
try:
@ -116,11 +126,11 @@ with gr.Blocks(title="UVR5 WebUI") as app:
value=
i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.")
)
with gr.Tabs():
with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
with gr.Group():
with gr.Group():
gr.Markdown(html_center(i18n("伴奏人声分离&去混响&去回声"),'h2'))
with gr.Group():
gr.Markdown(
value=i18n("人声伴奏分离批量处理, 使用UVR5模型。") + "<br>" + \
value=html_left(i18n("人声伴奏分离批量处理, 使用UVR5模型。") + "<br>" + \
i18n("合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。")+ "<br>" + \
i18n("模型分为三类:") + "<br>" + \
i18n("1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点") + "<br>" + \
@ -131,10 +141,11 @@ with gr.Blocks(title="UVR5 WebUI") as app:
i18n("去混响/去延迟,附:") + "<br>" + \
i18n("1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍") + "<br>" + \
i18n("2、MDX-Net-Dereverb模型挺慢的") + "<br>" + \
i18n("3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。")
i18n("3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。"),'h4')
)
with gr.Row():
with gr.Column():
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
dir_wav_input = gr.Textbox(
label=i18n("输入待处理音频文件夹路径"),
placeholder="C:\\Users\\Desktop\\todo-songs",
@ -143,7 +154,6 @@ with gr.Blocks(title="UVR5 WebUI") as app:
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
)
with gr.Column():
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
agg = gr.Slider(
minimum=0,
maximum=20,
@ -165,8 +175,11 @@ with gr.Blocks(title="UVR5 WebUI") as app:
value="flac",
interactive=True,
)
but2 = gr.Button(i18n("转换"), variant="primary")
vc_output4 = gr.Textbox(label=i18n("输出信息"))
with gr.Column():
with gr.Row():
but2 = gr.Button(i18n("转换"), variant="primary")
with gr.Row():
vc_output4 = gr.Textbox(label=i18n("输出信息"),lines=3)
but2.click(
uvr,
[

@ -25,6 +25,7 @@ if(os.path.exists(tmp)):
print(str(e))
pass
import site
import traceback
site_packages_roots = []
for path in site.getsitepackages():
if "packages" in path:
@ -38,14 +39,13 @@ for site_packages_root in site_packages_roots:
try:
with open("%s/users.pth" % (site_packages_root), "w") as f:
f.write(
"%s\n%s/tools\n%s/tools/damo_asr\n%s/GPT_SoVITS\n%s/tools/uvr5"
"%s\n%s/tools\n%s/tools/asr\n%s/GPT_SoVITS\n%s/tools/uvr5"
% (now_dir, now_dir, now_dir, now_dir, now_dir)
)
break
except PermissionError:
pass
except PermissionError as e:
traceback.print_exc()
from tools import my_utils
import traceback
import shutil
import pdb
from subprocess import Popen
@ -56,11 +56,13 @@ language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else "Auto"
os.environ["language"]=language
i18n = I18nAuto(language=language)
from scipy.io import wavfile
from tools.my_utils import load_audio
from tools.my_utils import load_audio, check_for_existance, check_details
from multiprocessing import cpu_count
# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
import gradio.analytics as analytics
analytics.version_check = lambda:None
try:
import gradio.analytics as analytics
analytics.version_check = lambda:None
except:...
import gradio as gr
n_cpu=cpu_count()
@ -195,34 +197,35 @@ def kill_process(pid):
kill_proc_tree(pid)
def change_label(if_label,path_list):
def change_label(path_list):
global p_label
if(if_label==True and p_label==None):
if(p_label==None):
check_for_existance([path_list])
path_list=my_utils.clean_path(path_list)
cmd = '"%s" tools/subfix_webui.py --load_list "%s" --webui_port %s --is_share %s'%(python_exec,path_list,webui_port_subfix,is_share)
yield i18n("打标工具WebUI已开启")
yield i18n("打标工具WebUI已开启"), {'__type__':'update','visible':False}, {'__type__':'update','visible':True}
print(cmd)
p_label = Popen(cmd, shell=True)
elif(if_label==False and p_label!=None):
elif(p_label!=None):
kill_process(p_label.pid)
p_label=None
yield i18n("打标工具WebUI已关闭")
yield i18n("打标工具WebUI已关闭"), {'__type__':'update','visible':True}, {'__type__':'update','visible':False}
def change_uvr5(if_uvr5):
def change_uvr5():
global p_uvr5
if(if_uvr5==True and p_uvr5==None):
if(p_uvr5==None):
cmd = '"%s" tools/uvr5/webui.py "%s" %s %s %s'%(python_exec,infer_device,is_half,webui_port_uvr5,is_share)
yield i18n("UVR5已开启")
yield i18n("UVR5已开启"), {'__type__':'update','visible':False}, {'__type__':'update','visible':True}
print(cmd)
p_uvr5 = Popen(cmd, shell=True)
elif(if_uvr5==False and p_uvr5!=None):
elif(p_uvr5!=None):
kill_process(p_uvr5.pid)
p_uvr5=None
yield i18n("UVR5已关闭")
yield i18n("UVR5已关闭"), {'__type__':'update','visible':True}, {'__type__':'update','visible':False}
def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path,sovits_path):
def change_tts_inference(bert_path,cnhubert_base_path,gpu_number,gpt_path,sovits_path):
global p_tts_inference
if(if_tts==True and p_tts_inference==None):
if(p_tts_inference==None):
os.environ["gpt_path"]=gpt_path if "/" in gpt_path else "%s/%s"%(GPT_weight_root,gpt_path)
os.environ["sovits_path"]=sovits_path if "/"in sovits_path else "%s/%s"%(SoVITS_weight_root,sovits_path)
os.environ["cnhubert_base_path"]=cnhubert_base_path
@ -232,13 +235,13 @@ def change_tts_inference(if_tts,bert_path,cnhubert_base_path,gpu_number,gpt_path
os.environ["infer_ttswebui"]=str(webui_port_infer_tts)
os.environ["is_share"]=str(is_share)
cmd = '"%s" GPT_SoVITS/inference_webui.py "%s"'%(python_exec, language)
yield i18n("TTS推理进程已开启")
yield i18n("TTS推理进程已开启"), {'__type__':'update','visible':False}, {'__type__':'update','visible':True}
print(cmd)
p_tts_inference = Popen(cmd, shell=True)
elif(if_tts==False and p_tts_inference!=None):
elif(p_tts_inference!=None):
kill_process(p_tts_inference.pid)
p_tts_inference=None
yield i18n("TTS推理进程已关闭")
yield i18n("TTS推理进程已关闭"), {'__type__':'update','visible':True}, {'__type__':'update','visible':False}
from tools.asr.config import asr_dict
def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_precision):
@ -246,7 +249,7 @@ def open_asr(asr_inp_dir, asr_opt_dir, asr_model, asr_model_size, asr_lang, asr_
if(p_asr==None):
asr_inp_dir=my_utils.clean_path(asr_inp_dir)
asr_opt_dir=my_utils.clean_path(asr_opt_dir)
check_for_exists([asr_inp_dir])
check_for_existance([asr_inp_dir])
cmd = f'"{python_exec}" tools/asr/{asr_dict[asr_model]["path"]}'
cmd += f' -i "{asr_inp_dir}"'
cmd += f' -o "{asr_opt_dir}"'
@ -277,7 +280,7 @@ def open_denoise(denoise_inp_dir, denoise_opt_dir):
if(p_denoise==None):
denoise_inp_dir=my_utils.clean_path(denoise_inp_dir)
denoise_opt_dir=my_utils.clean_path(denoise_opt_dir)
check_for_exists([denoise_inp_dir])
check_for_existance([denoise_inp_dir])
cmd = '"%s" tools/cmd-denoise.py -i "%s" -o "%s" -p %s'%(python_exec,denoise_inp_dir,denoise_opt_dir,"float16"if is_half==True else "float32")
yield "语音降噪任务开启:%s"%cmd, {"__type__":"update","visible":False}, {"__type__":"update","visible":True}, {"__type__":"update"}, {"__type__":"update"}
@ -306,7 +309,8 @@ def open1Ba(batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_s
data=json.loads(data)
s2_dir="%s/%s"%(exp_root,exp_name)
os.makedirs("%s/logs_s2"%(s2_dir),exist_ok=True)
check_for_exists([s2_dir],is_train=True)
if check_for_existance([s2_dir],is_train=True):
check_details([s2_dir],is_train=True)
if(is_half==False):
data["train"]["fp16_run"]=False
batch_size=max(1,batch_size//2)
@ -353,7 +357,8 @@ def open1Bb(batch_size,total_epoch,exp_name,if_dpo,if_save_latest,if_save_every_
data=yaml.load(data, Loader=yaml.FullLoader)
s1_dir="%s/%s"%(exp_root,exp_name)
os.makedirs("%s/logs_s1"%(s1_dir),exist_ok=True)
check_for_exists([s1_dir],is_train=True)
if check_for_existance([s1_dir],is_train=True):
check_details([s1_dir],is_train=True)
if(is_half==False):
data["train"]["precision"]="32"
batch_size = max(1, batch_size // 2)
@ -398,7 +403,7 @@ def open_slice(inp,opt_root,threshold,min_length,min_interval,hop_size,max_sil_k
global ps_slice
inp = my_utils.clean_path(inp)
opt_root = my_utils.clean_path(opt_root)
check_for_exists([inp])
check_for_existance([inp])
if(os.path.exists(inp)==False):
yield "输入路径不存在", {"__type__":"update","visible":True}, {"__type__":"update","visible":False}, {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
return
@ -437,7 +442,8 @@ def open1a(inp_text,inp_wav_dir,exp_name,gpu_numbers,bert_pretrained_dir):
global ps1a
inp_text = my_utils.clean_path(inp_text)
inp_wav_dir = my_utils.clean_path(inp_wav_dir)
check_for_exists([inp_text,inp_wav_dir], is_dataset_processing=True)
if check_for_existance([inp_text,inp_wav_dir], is_dataset_processing=True):
check_details([inp_text,inp_wav_dir], is_dataset_processing=True)
if (ps1a == []):
opt_dir="%s/%s"%(exp_root,exp_name)
config={
@ -499,7 +505,8 @@ def open1b(inp_text,inp_wav_dir,exp_name,gpu_numbers,ssl_pretrained_dir):
global ps1b
inp_text = my_utils.clean_path(inp_text)
inp_wav_dir = my_utils.clean_path(inp_wav_dir)
check_for_exists([inp_text,inp_wav_dir], is_dataset_processing=True)
if check_for_existance([inp_text,inp_wav_dir], is_dataset_processing=True):
check_details([inp_text,inp_wav_dir], is_dataset_processing=True)
if (ps1b == []):
config={
"inp_text":inp_text,
@ -547,7 +554,8 @@ ps1c=[]
def open1c(inp_text,exp_name,gpu_numbers,pretrained_s2G_path):
global ps1c
inp_text = my_utils.clean_path(inp_text)
check_for_exists([inp_text,''], is_dataset_processing=True)
if check_for_existance([inp_text,''], is_dataset_processing=True):
check_details([inp_text,''], is_dataset_processing=True)
if (ps1c == []):
opt_dir="%s/%s"%(exp_root,exp_name)
config={
@ -606,7 +614,8 @@ def open1abc(inp_text,inp_wav_dir,exp_name,gpu_numbers1a,gpu_numbers1Ba,gpu_numb
global ps1abc
inp_text = my_utils.clean_path(inp_text)
inp_wav_dir = my_utils.clean_path(inp_wav_dir)
check_for_exists([inp_text,inp_wav_dir])
if check_for_existance([inp_text,inp_wav_dir], is_dataset_processing=True):
check_details([inp_text,inp_wav_dir], is_dataset_processing=True)
if (ps1abc == []):
opt_dir="%s/%s"%(exp_root,exp_name)
try:
@ -743,38 +752,14 @@ def switch_version(version_):
gr.Warning(i18n(f'未下载{version.upper()}模型'))
return {'__type__':'update', 'value':pretrained_sovits_name[-int(version[-1])+2]}, {'__type__':'update', 'value':pretrained_sovits_name[-int(version[-1])+2].replace("s2G","s2D")}, {'__type__':'update', 'value':pretrained_gpt_name[-int(version[-1])+2]}, {'__type__':'update', 'value':pretrained_gpt_name[-int(version[-1])+2]}, {'__type__':'update', 'value':pretrained_sovits_name[-int(version[-1])+2]}
def check_for_exists(file_list=None,is_train=False,is_dataset_processing=False):
missing_files=[]
if is_train == True and file_list:
file_list.append(os.path.join(file_list[0],'2-name2text.txt'))
file_list.append(os.path.join(file_list[0],'3-bert'))
file_list.append(os.path.join(file_list[0],'4-cnhubert'))
file_list.append(os.path.join(file_list[0],'5-wav32k'))
file_list.append(os.path.join(file_list[0],'6-name2semantic.tsv'))
for file in file_list:
if os.path.exists(file):pass
else:missing_files.append(file)
if missing_files:
if is_train:
for missing_file in missing_files:
if missing_file != '':
gr.Warning(missing_file)
gr.Warning(i18n('以下文件或文件夹不存在:'))
else:
for missing_file in missing_files:
if missing_file != '':
gr.Warning(missing_file)
if file_list[-1]==[''] and is_dataset_processing:
pass
else:
gr.Warning(i18n('以下文件或文件夹不存在:'))
if os.path.exists('GPT_SoVITS/text/G2PWModel'):...
else:
cmd = '"%s" GPT_SoVITS/download.py'%python_exec
p = Popen(cmd, shell=True)
p.wait()
def sync(text):
return {'__type__':'update','value':text}
with gr.Blocks(title="GPT-SoVITS WebUI") as app:
gr.Markdown(
value=
@ -789,8 +774,11 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.TabItem(i18n("0-前置数据集获取工具")):#提前随机切片防止uvr5爆内存->uvr5->slicer->asr->打标
gr.Markdown(value=i18n("0a-UVR5人声伴奏分离&去混响去延迟工具"))
with gr.Row():
if_uvr5 = gr.Checkbox(label=i18n("是否开启UVR5-WebUI"),show_label=True)
uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息"))
with gr.Column(scale=3):
with gr.Row():
uvr5_info = gr.Textbox(label=i18n("UVR5进程输出信息"))
open_uvr5 = gr.Button(value=i18n("开启UVR5-WebUI"),variant="primary",visible=True)
close_uvr5 = gr.Button(value=i18n("关闭UVR5-WebUI"),variant="primary",visible=False)
gr.Markdown(value=i18n("0b-语音切分工具"))
with gr.Row():
with gr.Column(scale=3):
@ -805,9 +793,9 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
max_sil_kept=gr.Textbox(label=i18n("max_sil_kept:切完后静音最多留多长"),value="500")
with gr.Row():
_max=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("max:归一化后最大值多少"),value=0.9,interactive=True)
alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True)
n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True)
alpha=gr.Slider(minimum=0,maximum=1,step=0.05,label=i18n("alpha_mix:混多少比例归一化后音频进来"),value=0.25,interactive=True)
with gr.Row():
n_process=gr.Slider(minimum=1,maximum=n_cpu,step=1,label=i18n("切割使用的进程数"),value=4,interactive=True)
slicer_info = gr.Textbox(label=i18n("语音切割进程输出信息"))
open_slicer_button=gr.Button(i18n("开启语音切割"), variant="primary",visible=True)
close_slicer_button=gr.Button(i18n("终止语音切割"), variant="primary",visible=False)
@ -890,60 +878,86 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
gr.Markdown(value=i18n("0d-语音文本校对标注工具"))
with gr.Row():
if_label = gr.Checkbox(label=i18n("是否开启打标WebUI"),show_label=True)
path_list = gr.Textbox(
label=i18n(".list标注文件的路径"),
value="D:\\RVC1006\\GPT-SoVITS\\raw\\xxx.list",
interactive=True,
)
label_info = gr.Textbox(label=i18n("打标工具进程输出信息"))
if_label.change(change_label, [if_label,path_list], [label_info])
if_uvr5.change(change_uvr5, [if_uvr5], [uvr5_info])
with gr.Column(scale=3):
with gr.Row():
path_list = gr.Textbox(
label=i18n(".list标注文件的路径"),
value="D:\\RVC1006\\GPT-SoVITS\\raw\\xxx.list",
interactive=True,
)
label_info = gr.Textbox(label=i18n("打标工具进程输出信息"))
open_label = gr.Button(value=i18n("开启打标WebUI"),variant="primary",visible=True)
close_label = gr.Button(value=i18n("关闭打标WebUI"),variant="primary",visible=False)
open_label.click(change_label, [path_list], [label_info,open_label,close_label])
close_label.click(change_label, [path_list], [label_info,open_label,close_label])
open_uvr5.click(change_uvr5, [], [uvr5_info,open_uvr5,close_uvr5])
close_uvr5.click(change_uvr5, [], [uvr5_info,open_uvr5,close_uvr5])
with gr.TabItem(i18n("1-GPT-SoVITS-TTS")):
with gr.Row():
exp_name = gr.Textbox(label=i18n("*实验/模型名"), value="xxx", interactive=True)
gpu_info = gr.Textbox(label=i18n("显卡信息"), value=gpu_info, visible=True, interactive=False)
version_checkbox = gr.Radio(label=i18n("版本"),value=version,choices=['v1','v2'])
pretrained_s2G = gr.Textbox(label=i18n("预训练的SoVITS-G模型路径"), value=pretrained_sovits_name[-int(version[-1])+2], interactive=True)
pretrained_s2D = gr.Textbox(label=i18n("预训练的SoVITS-D模型路径"), value=pretrained_sovits_name[-int(version[-1])+2].replace("s2G","s2D"), interactive=True)
pretrained_s1 = gr.Textbox(label=i18n("预训练的GPT模型路径"), value=pretrained_gpt_name[-int(version[-1])+2], interactive=True)
with gr.Row():
exp_name = gr.Textbox(label=i18n("*实验/模型名"), value="xxx", interactive=True)
gpu_info = gr.Textbox(label=i18n("显卡信息"), value=gpu_info, visible=True, interactive=False)
version_checkbox = gr.Radio(label=i18n("版本"),value=version,choices=['v1','v2'])
with gr.Row():
pretrained_s2G = gr.Textbox(label=i18n("预训练的SoVITS-G模型路径"), value=pretrained_sovits_name[-int(version[-1])+2], interactive=True, lines=2, max_lines=3,scale=9)
pretrained_s2D = gr.Textbox(label=i18n("预训练的SoVITS-D模型路径"), value=pretrained_sovits_name[-int(version[-1])+2].replace("s2G","s2D"), interactive=True, lines=2, max_lines=3,scale=9)
pretrained_s1 = gr.Textbox(label=i18n("预训练的GPT模型路径"), value=pretrained_gpt_name[-int(version[-1])+2], interactive=True, lines=2, max_lines=3,scale=10)
with gr.TabItem(i18n("1A-训练集格式化工具")):
gr.Markdown(value=i18n("输出logs/实验名目录下应有23456开头的文件和文件夹"))
with gr.Row():
inp_text = gr.Textbox(label=i18n("*文本标注文件"),value=r"D:\RVC1006\GPT-SoVITS\raw\xxx.list",interactive=True)
inp_wav_dir = gr.Textbox(
label=i18n("*训练集音频文件目录"),
# value=r"D:\RVC1006\GPT-SoVITS\raw\xxx",
interactive=True,
placeholder=i18n("填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名不是全路径。如果留空则使用.list文件里的绝对全路径。")
)
with gr.Row():
inp_text = gr.Textbox(label=i18n("*文本标注文件"),value=r"D:\RVC1006\GPT-SoVITS\raw\xxx.list",interactive=True,scale=10)
with gr.Row():
inp_wav_dir = gr.Textbox(
label=i18n("*训练集音频文件目录"),
# value=r"D:\RVC1006\GPT-SoVITS\raw\xxx",
interactive=True,
placeholder=i18n("填切割后音频所在目录!读取的音频文件完整路径=该目录-拼接-list文件里波形对应的文件名不是全路径。如果留空则使用.list文件里的绝对全路径。"), scale=10
)
gr.Markdown(value=i18n("1Aa-文本内容"))
with gr.Row():
gpu_numbers1a = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
bert_pretrained_dir = gr.Textbox(label=i18n("预训练的中文BERT模型路径"),value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",interactive=False)
button1a_open = gr.Button(i18n("开启文本获取"), variant="primary",visible=True)
button1a_close = gr.Button(i18n("终止文本获取进程"), variant="primary",visible=False)
info1a=gr.Textbox(label=i18n("文本进程输出信息"))
with gr.Row():
gpu_numbers1a = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
with gr.Row():
bert_pretrained_dir = gr.Textbox(label=i18n("预训练的中文BERT模型路径"),value="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",interactive=False,lines=2)
with gr.Row():
button1a_open = gr.Button(i18n("开启文本获取"), variant="primary",visible=True)
button1a_close = gr.Button(i18n("终止文本获取进程"), variant="primary",visible=False)
with gr.Row():
info1a=gr.Textbox(label=i18n("文本进程输出信息"))
gr.Markdown(value=i18n("1Ab-SSL自监督特征提取"))
with gr.Row():
gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
cnhubert_base_dir = gr.Textbox(label=i18n("预训练的SSL模型路径"),value="GPT_SoVITS/pretrained_models/chinese-hubert-base",interactive=False)
button1b_open = gr.Button(i18n("开启SSL提取"), variant="primary",visible=True)
button1b_close = gr.Button(i18n("终止SSL提取进程"), variant="primary",visible=False)
info1b=gr.Textbox(label=i18n("SSL进程输出信息"))
with gr.Row():
gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
with gr.Row():
cnhubert_base_dir = gr.Textbox(label=i18n("预训练的SSL模型路径"),value="GPT_SoVITS/pretrained_models/chinese-hubert-base",interactive=False,lines=2)
with gr.Row():
button1b_open = gr.Button(i18n("开启SSL提取"), variant="primary",visible=True)
button1b_close = gr.Button(i18n("终止SSL提取进程"), variant="primary",visible=False)
with gr.Row():
info1b=gr.Textbox(label=i18n("SSL进程输出信息"))
gr.Markdown(value=i18n("1Ac-语义token提取"))
with gr.Row():
gpu_numbers1c = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
button1c_open = gr.Button(i18n("开启语义token提取"), variant="primary",visible=True)
button1c_close = gr.Button(i18n("终止语义token提取进程"), variant="primary",visible=False)
info1c=gr.Textbox(label=i18n("语义token提取进程输出信息"))
with gr.Row():
gpu_numbers1c = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"),value="%s-%s"%(gpus,gpus),interactive=True)
with gr.Row():
pretrained_s2G_ = gr.Textbox(label=i18n("预训练的SoVITS-G模型路径"), value=pretrained_sovits_name[-int(version[-1])+2], interactive=False,lines=2)
with gr.Row():
button1c_open = gr.Button(i18n("开启语义token提取"), variant="primary",visible=True)
button1c_close = gr.Button(i18n("终止语义token提取进程"), variant="primary",visible=False)
with gr.Row():
info1c=gr.Textbox(label=i18n("语义token提取进程输出信息"))
gr.Markdown(value=i18n("1Aabc-训练集格式化一键三连"))
with gr.Row():
button1abc_open = gr.Button(i18n("开启一键三连"), variant="primary",visible=True)
button1abc_close = gr.Button(i18n("终止一键三连"), variant="primary",visible=False)
info1abc=gr.Textbox(label=i18n("一键三连进程输出信息"))
with gr.Row():
button1abc_open = gr.Button(i18n("开启一键三连"), variant="primary",visible=True)
button1abc_close = gr.Button(i18n("终止一键三连"), variant="primary",visible=False)
with gr.Row():
info1abc=gr.Textbox(label=i18n("一键三连进程输出信息"))
pretrained_s2G.change(sync,[pretrained_s2G],[pretrained_s2G_])
open_asr_button.click(open_asr, [asr_inp_dir, asr_opt_dir, asr_model, asr_size, asr_lang, asr_precision], [asr_info,open_asr_button,close_asr_button,path_list,inp_text,inp_wav_dir])
close_asr_button.click(close_asr, [], [asr_info,open_asr_button,close_asr_button])
open_slicer_button.click(open_slice, [slice_inp_path,slice_opt_root,threshold,min_length,min_interval,hop_size,max_sil_kept,_max,alpha,n_process], [slicer_info,open_slicer_button,close_slicer_button,asr_inp_dir,denoise_input_dir,inp_wav_dir])
@ -962,30 +976,46 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.TabItem(i18n("1B-微调训练")):
gr.Markdown(value=i18n("1Ba-SoVITS训练。用于分享的模型文件输出在SoVITS_weights下。"))
with gr.Row():
batch_size = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
total_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("总训练轮数total_epoch不建议太高"),value=8,interactive=True)
text_low_lr_rate = gr.Slider(minimum=0.2,maximum=0.6,step=0.05,label=i18n("文本模块学习率权重"),value=0.4,interactive=True)
save_every_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("保存频率save_every_epoch"),value=4,interactive=True)
if_save_latest = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
if_save_every_weights = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True)
with gr.Column():
with gr.Row():
batch_size = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
total_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("总训练轮数total_epoch不建议太高"),value=8,interactive=True)
with gr.Row():
text_low_lr_rate = gr.Slider(minimum=0.2,maximum=0.6,step=0.05,label=i18n("文本模块学习率权重"),value=0.4,interactive=True)
save_every_epoch = gr.Slider(minimum=1,maximum=25,step=1,label=i18n("保存频率save_every_epoch"),value=4,interactive=True)
with gr.Column():
with gr.Column():
if_save_latest = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
if_save_every_weights = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
with gr.Row():
gpu_numbers1Ba = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True)
with gr.Row():
button1Ba_open = gr.Button(i18n("开启SoVITS训练"), variant="primary",visible=True)
button1Ba_close = gr.Button(i18n("终止SoVITS训练"), variant="primary",visible=False)
info1Ba=gr.Textbox(label=i18n("SoVITS训练进程输出信息"))
with gr.Row():
button1Ba_open = gr.Button(i18n("开启SoVITS训练"), variant="primary",visible=True)
button1Ba_close = gr.Button(i18n("终止SoVITS训练"), variant="primary",visible=False)
with gr.Row():
info1Ba=gr.Textbox(label=i18n("SoVITS训练进程输出信息"))
gr.Markdown(value=i18n("1Bb-GPT训练。用于分享的模型文件输出在GPT_weights下。"))
with gr.Row():
batch_size1Bb = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
total_epoch1Bb = gr.Slider(minimum=2,maximum=50,step=1,label=i18n("总训练轮数total_epoch"),value=15,interactive=True)
if_dpo = gr.Checkbox(label=i18n("是否开启dpo训练选项(实验性)"), value=False, interactive=True, show_label=True)
if_save_latest1Bb = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
if_save_every_weights1Bb = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
save_every_epoch1Bb = gr.Slider(minimum=1,maximum=50,step=1,label=i18n("保存频率save_every_epoch"),value=5,interactive=True)
gpu_numbers1Bb = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True)
with gr.Column():
with gr.Row():
batch_size1Bb = gr.Slider(minimum=1,maximum=40,step=1,label=i18n("每张显卡的batch_size"),value=default_batch_size,interactive=True)
total_epoch1Bb = gr.Slider(minimum=2,maximum=50,step=1,label=i18n("总训练轮数total_epoch"),value=15,interactive=True)
with gr.Row():
save_every_epoch1Bb = gr.Slider(minimum=1,maximum=50,step=1,label=i18n("保存频率save_every_epoch"),value=5,interactive=True)
if_dpo = gr.Checkbox(label=i18n("是否开启dpo训练选项(实验性)"), value=False, interactive=True, show_label=True)
with gr.Column():
with gr.Column():
if_save_latest1Bb = gr.Checkbox(label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), value=True, interactive=True, show_label=True)
if_save_every_weights1Bb = gr.Checkbox(label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), value=True, interactive=True, show_label=True)
with gr.Row():
gpu_numbers1Bb = gr.Textbox(label=i18n("GPU卡号以-分割,每个卡号一个进程"), value="%s" % (gpus), interactive=True)
with gr.Row():
button1Bb_open = gr.Button(i18n("开启GPT训练"), variant="primary",visible=True)
button1Bb_close = gr.Button(i18n("终止GPT训练"), variant="primary",visible=False)
info1Bb=gr.Textbox(label=i18n("GPT训练进程输出信息"))
with gr.Row():
button1Bb_open = gr.Button(i18n("开启GPT训练"), variant="primary",visible=True)
button1Bb_close = gr.Button(i18n("终止GPT训练"), variant="primary",visible=False)
with gr.Row():
info1Bb=gr.Textbox(label=i18n("GPT训练进程输出信息"))
button1Ba_open.click(open1Ba, [batch_size,total_epoch,exp_name,text_low_lr_rate,if_save_latest,if_save_every_weights,save_every_epoch,gpu_numbers1Ba,pretrained_s2G,pretrained_s2D], [info1Ba,button1Ba_open,button1Ba_close])
button1Ba_close.click(close1Ba, [], [info1Ba,button1Ba_open,button1Ba_close])
button1Bb_open.click(open1Bb, [batch_size1Bb,total_epoch1Bb,exp_name,if_dpo,if_save_latest1Bb,if_save_every_weights1Bb,save_every_epoch1Bb,gpu_numbers1Bb,pretrained_s1], [info1Bb,button1Bb_open,button1Bb_close])
@ -993,15 +1023,21 @@ with gr.Blocks(title="GPT-SoVITS WebUI") as app:
with gr.TabItem(i18n("1C-推理")):
gr.Markdown(value=i18n("选择训练完存放在SoVITS_weights和GPT_weights下的模型。默认的一个是底模体验5秒Zero Shot TTS用。"))
with gr.Row():
GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names,key=custom_sort_key),value=pretrained_gpt_name[0],interactive=True)
SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names,key=custom_sort_key),value=pretrained_sovits_name[0],interactive=True)
gpu_number_1C=gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True)
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
with gr.Row():
GPT_dropdown = gr.Dropdown(label=i18n("*GPT模型列表"), choices=sorted(GPT_names,key=custom_sort_key),value=pretrained_gpt_name[0],interactive=True)
SoVITS_dropdown = gr.Dropdown(label=i18n("*SoVITS模型列表"), choices=sorted(SoVITS_names,key=custom_sort_key),value=pretrained_sovits_name[0],interactive=True)
with gr.Row():
gpu_number_1C=gr.Textbox(label=i18n("GPU卡号,只能填1个整数"), value=gpus, interactive=True)
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
refresh_button.click(fn=change_choices,inputs=[],outputs=[SoVITS_dropdown,GPT_dropdown])
with gr.Row():
if_tts = gr.Checkbox(label=i18n("是否开启TTS推理WebUI"), show_label=True)
tts_info = gr.Textbox(label=i18n("TTS推理WebUI进程输出信息"))
if_tts.change(change_tts_inference, [if_tts,bert_pretrained_dir,cnhubert_base_dir,gpu_number_1C,GPT_dropdown,SoVITS_dropdown], [tts_info])
with gr.Row():
open_tts = gr.Button(value=i18n("开启TTS推理WebUI"),variant='primary',visible=True)
close_tts = gr.Button(value=i18n("关闭TTS推理WebUI"),variant='primary',visible=False)
with gr.Row():
tts_info = gr.Textbox(label=i18n("TTS推理WebUI进程输出信息"))
open_tts.click(change_tts_inference, [bert_pretrained_dir,cnhubert_base_dir,gpu_number_1C,GPT_dropdown,SoVITS_dropdown], [tts_info,open_tts,close_tts])
close_tts.click(change_tts_inference, [bert_pretrained_dir,cnhubert_base_dir,gpu_number_1C,GPT_dropdown,SoVITS_dropdown], [tts_info,open_tts,close_tts])
version_checkbox.change(switch_version,[version_checkbox],[pretrained_s2G,pretrained_s2D,pretrained_s1,GPT_dropdown,SoVITS_dropdown])
with gr.TabItem(i18n("2-GPT-SoVITS-变声")):gr.Markdown(value=i18n("施工中,请静候佳音"))
app.queue(concurrency_count=511, max_size=1022).launch(

Loading…
Cancel
Save