From 6c468583c5566e5fbb4fb805e4cc89c403e997b8 Mon Sep 17 00:00:00 2001
From: XXXXRT666 <157766680+XXXXRT666@users.noreply.github.com>
Date: Mon, 31 Mar 2025 04:27:12 +0100
Subject: [PATCH] Fix dependency-related issues via requirements update (#2236)
* Update requirements.txt
* Create constraints.txt
* Update README.md
* Update README.md
* Update README.md
* Update README.md
* Update README.md
* pyopenjtalk and onnx fix
* Update requirements.txt
* Update requirements.txt
* Update install.sh
* update shell install.sh
* update docs
* Update Install.sh
* fix bugs
* Update .gitignore
* Update .gitignore
* Update install.sh
* Update install.sh
* Update extra-req.txt
* Update requirements.txt
---
.gitignore | 180 ++++++++++++++++++++++++++++++++++++-
GPT_SoVITS_Inference.ipynb | 99 ++++++++++----------
README.md | 39 ++++++--
colab_webui.ipynb | 71 +++++++--------
docs/cn/README.md | 99 ++++++++++++--------
docs/ja/README.md | 95 ++++++++++++--------
docs/ko/README.md | 37 ++++++--
docs/tr/README.md | 36 ++++++--
extra-req.txt | 1 +
gpt-sovits_kaggle.ipynb | 3 +-
install.sh | 55 +++++++++---
requirements.txt | 10 ++-
12 files changed, 526 insertions(+), 199 deletions(-)
create mode 100644 extra-req.txt
diff --git a/.gitignore b/.gitignore
index e5cedbf..0bb4e0b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,5 +18,183 @@ TEMP
weight.json
ffmpeg*
ffprobe*
+cfg.json
+speakers.json
+ref_audios
tools/AP_BWE_main/24kto48k/*
-!tools/AP_BWE_main/24kto48k/readme.txt
\ No newline at end of file
+!tools/AP_BWE_main/24kto48k/readme.txt
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# UV
+# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+#uv.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
diff --git a/GPT_SoVITS_Inference.ipynb b/GPT_SoVITS_Inference.ipynb
index a5b5532..1b8ec64 100644
--- a/GPT_SoVITS_Inference.ipynb
+++ b/GPT_SoVITS_Inference.ipynb
@@ -1,42 +1,37 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": []
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "accelerator": "GPU"
- },
"cells": [
{
"cell_type": "markdown",
- "source": [
- "# Credits for bubarino giving me the huggingface import code (感谢 bubarino 给了我 huggingface 导入代码)"
- ],
"metadata": {
"id": "himHYZmra7ix"
- }
+ },
+ "source": [
+ "# Credits for bubarino giving me the huggingface import code (感谢 bubarino 给了我 huggingface 导入代码)"
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
"metadata": {
"id": "e9b7iFV3dm1f"
},
+ "outputs": [],
"source": [
"!git clone https://github.com/RVC-Boss/GPT-SoVITS.git\n",
"%cd GPT-SoVITS\n",
"!apt-get update && apt-get install -y --no-install-recommends tzdata ffmpeg libsox-dev parallel aria2 git git-lfs && git lfs install\n",
+ "!pip install -r extra-req.txt --no-deps\n",
"!pip install -r requirements.txt"
- ],
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "0NgxXg5sjv7z"
+ },
+ "outputs": [],
"source": [
"# @title Download pretrained models 下载预训练模型\n",
"!mkdir -p /content/GPT-SoVITS/GPT_SoVITS/pretrained_models\n",
@@ -53,16 +48,16 @@
"!git clone https://huggingface.co/Delik/uvr5_weights\n",
"!git config core.sparseCheckout true\n",
"!mv /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/GPT-SoVITS/* /content/GPT-SoVITS/GPT_SoVITS/pretrained_models/"
- ],
- "metadata": {
- "id": "0NgxXg5sjv7z",
- "cellView": "form"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "cPDEH-9czOJF"
+ },
+ "outputs": [],
"source": [
"#@title Create folder models 创建文件夹模型\n",
"import os\n",
@@ -77,16 +72,16 @@
" print(f\"The folder '{folder_name}' was created successfully! (文件夹'{folder_name}'已成功创建!)\")\n",
"\n",
"print(\"All folders have been created. (所有文件夹均已创建。)\")"
- ],
- "metadata": {
- "cellView": "form",
- "id": "cPDEH-9czOJF"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "vbZY-LnM0tzq"
+ },
+ "outputs": [],
"source": [
"import requests\n",
"import zipfile\n",
@@ -124,29 +119,35 @@
" shutil.move(source_path, destination_path)\n",
"\n",
"print(f'Model downloaded. (模型已下载。)')"
- ],
- "metadata": {
- "cellView": "form",
- "id": "vbZY-LnM0tzq"
- },
- "execution_count": null,
- "outputs": []
+ ]
},
{
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "4oRGUzkrk8C7"
+ },
+ "outputs": [],
"source": [
"# @title launch WebUI 启动WebUI\n",
"!/usr/local/bin/pip install ipykernel\n",
"!sed -i '10s/False/True/' /content/GPT-SoVITS/config.py\n",
"%cd /content/GPT-SoVITS/\n",
"!/usr/local/bin/python webui.py"
- ],
- "metadata": {
- "id": "4oRGUzkrk8C7",
- "cellView": "form"
- },
- "execution_count": null,
- "outputs": []
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
}
- ]
-}
\ No newline at end of file
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/README.md b/README.md
index 04bfff2..8be71a6 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,5 @@
-
GPT-SoVITS-WebUI
A Powerful Few-shot Voice Conversion and Text-to-Speech WebUI.
@@ -77,6 +76,7 @@ bash install.sh
```bash
conda create -n GPTSoVits python=3.9
conda activate GPTSoVits
+pip install -r extra-req.txt --no-deps
pip install -r requirements.txt
```
@@ -105,6 +105,7 @@ Download and place [ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWeb
Install [Visual Studio 2017](https://aka.ms/vs/17/release/vc_redist.x86.exe) (Korean TTS Only)
##### MacOS Users
+
```bash
brew install ffmpeg
```
@@ -112,6 +113,7 @@ brew install ffmpeg
#### Install Dependences
```bash
+pip install -r extra-req.txt --no-deps
pip install -r requirements.txt
```
@@ -150,9 +152,9 @@ docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-Docker
3. For UVR5 (Vocals/Accompaniment Separation & Reverberation Removal, additionally), download models from [UVR5 Weights](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/uvr5_weights) and place them in `tools/uvr5/uvr5_weights`.
- - If you want to use `bs_roformer` or `mel_band_roformer` models for UVR5, you can manually download the model and corresponding configuration file, and put them in `tools/uvr5/uvr5_weights`. **Rename the model file and configuration file, ensure that the model and configuration files have the same and corresponding names except for the suffix**. In addition, the model and configuration file names **must include `roformer`** in order to be recognized as models of the roformer class.
+ - If you want to use `bs_roformer` or `mel_band_roformer` models for UVR5, you can manually download the model and corresponding configuration file, and put them in `tools/uvr5/uvr5_weights`. **Rename the model file and configuration file, ensure that the model and configuration files have the same and corresponding names except for the suffix**. In addition, the model and configuration file names **must include `roformer`** in order to be recognized as models of the roformer class.
- - The suggestion is to **directly specify the model type** in the model name and configuration file name, such as `mel_mand_roformer`, `bs_roformer`. If not specified, the features will be compared from the configuration file to determine which type of model it is. For example, the model `bs_roformer_ep_368_sdr_12.9628.ckpt` and its corresponding configuration file `bs_roformer_ep_368_sdr_12.9628.yaml` are a pair, `kim_mel_band_roformer.ckpt` and `kim_mel_band_roformer.yaml` are also a pair.
+ - The suggestion is to **directly specify the model type** in the model name and configuration file name, such as `mel_mand_roformer`, `bs_roformer`. If not specified, the features will be compared from the configuration file to determine which type of model it is. For example, the model `bs_roformer_ep_368_sdr_12.9628.ckpt` and its corresponding configuration file `bs_roformer_ep_368_sdr_12.9628.yaml` are a pair, `kim_mel_band_roformer.ckpt` and `kim_mel_band_roformer.yaml` are also a pair.
4. For Chinese ASR (additionally), download models from [Damo ASR Model](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/files), [Damo VAD Model](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/files), and [Damo Punc Model](https://modelscope.cn/models/damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/files) and place them in `tools/asr/models`.
@@ -200,6 +202,7 @@ if you want to switch to V1,then
```bash
python webui.py v1
```
+
Or maunally switch version in WebUI
### Finetune
@@ -217,18 +220,20 @@ Or maunally switch version in WebUI
#### Integrated Package Users
-Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
+Double-click `go-webui-v2.bat` or use `go-webui-v2.ps1` ,then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
#### Others
```bash
python GPT_SoVITS/inference_webui.py
```
+
OR
```bash
python webui.py
```
+
then open the inference webui at `1-GPT-SoVITS-TTS/1C-inference`
## V2 Release Notes
@@ -243,7 +248,7 @@ New Features:
4. Improved synthesis quality for low-quality reference audio
- [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v2%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
+ [more details]()
Use v2 from v1 environment:
@@ -253,7 +258,7 @@ Use v2 from v1 environment:
3. Download v2 pretrained models from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main/gsv-v2final-pretrained) and put them into `GPT_SoVITS\pretrained_models\gsv-v2final-pretrained`.
- Chinese v2 additional: [G2PWModel_1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)(Download G2PW models, unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS/text`.
+ Chinese v2 additional: [G2PWModel_1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip)(Download G2PW models, unzip and rename to `G2PWModel`, and then place them in `GPT_SoVITS/text`.
## V3 Release Notes
@@ -263,7 +268,7 @@ New Features:
2. GPT model is more stable, with fewer repetitions and omissions, and it is easier to generate speech with richer emotional expression.
- [more details](https://github.com/RVC-Boss/GPT-SoVITS/wiki/GPT%E2%80%90SoVITS%E2%80%90v3%E2%80%90features-(%E6%96%B0%E7%89%B9%E6%80%A7))
+ [more details]()
Use v3 from v2 environment:
@@ -273,8 +278,7 @@ Use v3 from v2 environment:
3. Download v3 pretrained models (s1v3.ckpt, s2Gv3.pth and models--nvidia--bigvgan_v2_24khz_100band_256x folder) from [huggingface](https://huggingface.co/lj1995/GPT-SoVITS/tree/main) and put them into `GPT_SoVITS\pretrained_models`.
- additional: for Audio Super Resolution model, you can read [how to download](./tools/AP_BWE_main/24kto48k/readme.txt)
-
+ additional: for Audio Super Resolution model, you can read [how to download](./tools/AP_BWE_main/24kto48k/readme.txt)
## Todo List
@@ -297,15 +301,20 @@ Use v3 from v2 environment:
- [ ] model mix
## (Additional) Method for running from the command line
+
Use the command line to open the WebUI for UVR5
+
```
python tools/uvr5/webui.py ""
```
+
+
This is how the audio segmentation of the dataset is done using the command line
+
```
python audio_slicer.py \
--input_path "" \
@@ -315,16 +324,21 @@ python audio_slicer.py \
--min_interval
--hop_size
```
+
This is how dataset ASR processing is done using the command line(Only Chinese)
+
```
python tools/asr/funasr_asr.py -i -o