Merge pull request #761 from Lion-Wu/fix/cpu

Support CPU training, use CPU on macOS
1 year ago · 3dfbcbcd82
parent 3b11cd9814 7822f1961b
commit 3dfbcbcd82
12 changed files with 48 additions and 84 deletions
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@ -49,7 +49,7 @@ is_share = os.environ.get("is_share", "False")
 is_share = eval(is_share)
 if "_CUDA_VISIBLE_DEVICES" in os.environ:
    os.environ["CUDA_VISIBLE_DEVICES"] = os.environ["_CUDA_VISIBLE_DEVICES"]
-is_half = eval(os.environ.get("is_half", "True")) and not torch.backends.mps.is_available()
+is_half = eval(os.environ.get("is_half", "True")) and torch.cuda.is_available()
 import gradio as gr
 from transformers import AutoModelForMaskedLM, AutoTokenizer
 import numpy as np
@ -69,7 +69,7 @@ from tools.i18n.i18n import I18nAuto
 i18n = I18nAuto()
-os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
+# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 确保直接启动推理UI时也能够设置。
 if torch.cuda.is_available():
    device = "cuda"
--- a/GPT_SoVITS/prepare_datasets/1-get-text.py
+++ b/GPT_SoVITS/prepare_datasets/1-get-text.py
@ -49,8 +49,8 @@ if os.path.exists(txt_path) == False:
    os.makedirs(bert_dir, exist_ok=True)
    if torch.cuda.is_available():
        device = "cuda:0"
-    elif torch.backends.mps.is_available():
+    # elif torch.backends.mps.is_available():
-        device = "mps"
+    #     device = "mps"
    else:
        device = "cpu"
    tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir)
--- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py
+++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py
@ -50,8 +50,8 @@ maxx=0.95
 alpha=0.5
 if torch.cuda.is_available():
    device = "cuda:0"
-elif torch.backends.mps.is_available():
+# elif torch.backends.mps.is_available():
-    device = "mps"
+#     device = "mps"
 else:
    device = "cpu"
 model=cnhubert.get_model()
--- a/GPT_SoVITS/prepare_datasets/3-get-semantic.py
+++ b/GPT_SoVITS/prepare_datasets/3-get-semantic.py
@ -40,8 +40,8 @@ if os.path.exists(semantic_path) == False:
    if torch.cuda.is_available():
        device = "cuda"
-    elif torch.backends.mps.is_available():
+    # elif torch.backends.mps.is_available():
-        device = "mps"
+    #     device = "mps"
    else:
        device = "cpu"
    hps = utils.get_hparams_from_file(s2config_path)
--- a/GPT_SoVITS/s1_train.py
+++ b/GPT_SoVITS/s1_train.py
@ -118,16 +118,16 @@ def main(args):
    os.environ["MASTER_ADDR"]="localhost"
    trainer: Trainer = Trainer(
        max_epochs=config["train"]["epochs"],
-        accelerator="gpu",
+        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        # val_check_interval=9999999999999999999999,###不要验证
        # check_val_every_n_epoch=None,
        limit_val_batches=0,
-        devices=-1,
+        devices=-1 if torch.cuda.is_available() else 1,
        benchmark=False,
        fast_dev_run=False,
-        strategy = "auto" if torch.backends.mps.is_available() else DDPStrategy(
+        strategy = DDPStrategy(
            process_group_backend="nccl" if platform.system() != "Windows" else "gloo"
-        ),  # mps 不支持多节点训练
+        ) if torch.cuda.is_available() else "auto",
        precision=config["train"]["precision"],
        logger=logger,
        num_sanity_val_steps=0,
--- a/GPT_SoVITS/s2_train.py
+++ b/GPT_SoVITS/s2_train.py
@ -41,15 +41,15 @@ torch.set_float32_matmul_precision("medium")  # 最低精度但最快（也就
 # from config import pretrained_s2G,pretrained_s2D
 global_step = 0
 device = "cpu"  # cuda以外的设备，等mps优化后加入
 def main():
    """Assume Single Node Multi GPUs Training Only"""
    assert torch.cuda.is_available() or torch.backends.mps.is_available(), "Only GPU training is allowed."
-    if torch.backends.mps.is_available():
+    if torch.cuda.is_available():
        n_gpus = 1
    else:
        n_gpus = torch.cuda.device_count()
    else:
        n_gpus = 1
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = str(randint(20000, 55555))
@ -73,7 +73,7 @@ def run(rank, n_gpus, hps):
        writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
    dist.init_process_group(
-        backend = "gloo" if os.name == "nt" or torch.backends.mps.is_available() else "nccl",
+        backend = "gloo" if os.name == "nt" or not torch.cuda.is_available() else "nccl",
        init_method="env://",
        world_size=n_gpus,
        rank=rank,
@ -137,9 +137,9 @@ def run(rank, n_gpus, hps):
        hps.train.segment_size // hps.data.hop_length,
        n_speakers=hps.data.n_speakers,
        **hps.model,
-    ).to("mps")
+    ).to(device)
-    net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to("mps")
+    net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to(device)
    for name, param in net_g.named_parameters():
        if not param.requires_grad:
            print(name, "not requires_grad")
@ -187,8 +187,8 @@ def run(rank, n_gpus, hps):
        net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
        net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
    else:
-        net_g = net_g.to("mps")
+        net_g = net_g.to(device)
-        net_d = net_d.to("mps")
+        net_d = net_d.to(device)
    try:  # 如果能加载自动resume
        _, _, _, epoch_str = utils.load_checkpoint(
@ -320,12 +320,12 @@ def train_and_evaluate(
                rank, non_blocking=True
            )
        else:
-            spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps")
+            spec, spec_lengths = spec.to(device), spec_lengths.to(device)
-            y, y_lengths = y.to("mps"), y_lengths.to("mps")
+            y, y_lengths = y.to(device), y_lengths.to(device)
-            ssl = ssl.to("mps")
+            ssl = ssl.to(device)
            ssl.requires_grad = False
            # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True)
-            text, text_lengths = text.to("mps"), text_lengths.to("mps")
+            text, text_lengths = text.to(device), text_lengths.to(device)
        with autocast(enabled=hps.train.fp16_run):
            (
@ -532,10 +532,10 @@ def evaluate(hps, generator, eval_loader, writer_eval):
                ssl = ssl.cuda()
                text, text_lengths = text.cuda(), text_lengths.cuda()
            else:
-                spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps")
+                spec, spec_lengths = spec.to(device), spec_lengths.to(device)
-                y, y_lengths = y.to("mps"), y_lengths.to("mps")
+                y, y_lengths = y.to(device), y_lengths.to(device)
-                ssl = ssl.to("mps")
+                ssl = ssl.to(device)
-                text, text_lengths = text.to("mps"), text_lengths.to("mps")
+                text, text_lengths = text.to(device), text_lengths.to(device)
            for test in [0, 1]:
                y_hat, mask, *_ = generator.module.infer(
                    ssl, spec, spec_lengths, text, text_lengths, test=test
--- a/README.md
+++ b/README.md
@ -33,7 +33,7 @@ Unseen speakers few-shot fine-tuning demo:
 https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb
-[教程中文版](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) [User guide (EN)](https://rentry.co/GPT-SoVITS-guide#/)
+**User guide: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**
 ## Installation
@ -61,13 +61,7 @@ bash install.sh
 ### macOS
-Only Macs that meet the following conditions can train models:
+**Note: The models trained with GPUs on Macs result in significantly lower quality compared to those trained on other devices, so we are temporarily using CPUs instead.**
 - Mac computers with Apple silicon
 - macOS 12.3 or later
 - Xcode command-line tools installed by running `xcode-select --install`
 **All Macs can do inference with CPU, which has been demonstrated to outperform GPU inference.**
 First make sure you have installed FFmpeg by running `brew install ffmpeg` or `conda install ffmpeg`, then install by using the following commands:
@ -75,12 +69,9 @@ First make sure you have installed FFmpeg by running `brew install ffmpeg` or `c
 conda create -n GPTSoVits python=3.9
 conda activate GPTSoVits
 pip3 install --pre torch torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
 pip install -r requirements.txt
 ```
 _Note: Training models will only work if you've installed PyTorch Nightly._
 ### Install Manually
 #### Install Dependences
--- a/api.py
+++ b/api.py
@ -13,7 +13,7 @@
 `-dt` - `默认参考音频文本`
 `-dl` - `默认参考音频语种, "中文","英文","日文","zh","en","ja"`
-`-d` - `推理设备, "cuda","cpu","mps"`
+`-d` - `推理设备, "cuda","cpu"`
 `-a` - `绑定地址, 默认"127.0.0.1"`
 `-p` - `绑定端口, 默认9880, 可在 config.py 中指定`
 `-fp` - `覆盖 config.py 使用全精度`
@ -143,7 +143,7 @@ parser.add_argument("-dr", "--default_refer_path", type=str, default="", help="
 parser.add_argument("-dt", "--default_refer_text", type=str, default="", help="默认参考音频文本")
 parser.add_argument("-dl", "--default_refer_language", type=str, default="", help="默认参考音频语种")
-parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu / mps")
+parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu")
 parser.add_argument("-a", "--bind_addr", type=str, default="0.0.0.0", help="default: 0.0.0.0")
 parser.add_argument("-p", "--port", type=int, default=g_config.api_port, help="default: 9880")
 parser.add_argument("-fp", "--full_precision", action="store_true", default=False, help="覆盖config.is_half为False, 使用全精度")
@ -482,9 +482,6 @@ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language):
    wav.seek(0)
    torch.cuda.empty_cache()
    if device == "mps":
        print('executed torch.mps.empty_cache()')
        torch.mps.empty_cache()
    return StreamingResponse(wav, media_type="audio/wav")
--- a/docs/cn/README.md
+++ b/docs/cn/README.md
@ -33,6 +33,8 @@
 https://github.com/RVC-Boss/GPT-SoVITS/assets/129054828/05bee1fa-bdd8-4d85-9350-80c060ab47fb
 **用户手册: [简体中文](https://www.yuque.com/baicaigongchang1145haoyuangong/ib3g1e) | [English](https://rentry.co/GPT-SoVITS-guide#/)**
 ## 安装
 中国地区用户可[点击此处](https://www.codewithgpu.com/i/RVC-Boss/GPT-SoVITS/GPT-SoVITS-Official)使用 AutoDL 云端镜像进行体验。
@ -59,13 +61,7 @@ bash install.sh
 ### macOS
-只有符合以下条件的 Mac 可以训练模型：
+**注：在 Mac 上使用 GPU 训练的模型效果显著低于其他设备训练的模型，所以我们暂时使用CPU进行训练。**
 - 搭载 Apple 芯片的 Mac
 - 运行macOS 12.3 或更高版本
 - 已通过运行`xcode-select --install`安装 Xcode command-line tools
 **所有 Mac 都可使用 CPU 进行推理，且已测试性能优于 GPU。**
 首先确保你已通过运行 `brew install ffmpeg` 或 `conda install ffmpeg` 安装 FFmpeg，然后运行以下命令安装：
@ -73,12 +69,9 @@ bash install.sh
 conda create -n GPTSoVits python=3.9
 conda activate GPTSoVits
 pip3 install --pre torch torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
 pip install -r requirements.txt
 ```
 _注：只有安装了Pytorch Nightly才可训练模型。_
 ### 手动安装
 #### 安装依赖
--- a/docs/ja/README.md
+++ b/docs/ja/README.md
@ -57,13 +57,7 @@ bash install.sh
 ### macOS
-モデルをトレーニングできるMacは、以下の条件を満たす必要があります：
+**注：MacでGPUを使用して訓練されたモデルは、他のデバイスで訓練されたモデルと比較して著しく品質が低下するため、当面はCPUを使用して訓練します。**
 - Appleシリコンを搭載したMacコンピュータ
 - macOS 12.3以降
 - `xcode-select --install`を実行してインストールされたXcodeコマンドラインツール
 **すべてのMacはCPUを使用して推論を行うことができ、GPU推論よりも優れていることが実証されています。**
 まず、`brew install ffmpeg`または`conda install ffmpeg`を実行してFFmpegをインストールしたことを確認してください。次に、以下のコマンドを使用してインストールします：
@ -71,12 +65,9 @@ bash install.sh
 conda create -n GPTSoVits python=3.9
 conda activate GPTSoVits
 pip3 install --pre torch torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
 pip install -r requirements.txt
 ```
 _注：PyTorch Nightlyをインストールした場合にのみ、モデルのトレーニングが可能です。_
 ### 手動インストール
 #### 依存関係をインストールします
--- a/docs/ko/README.md
+++ b/docs/ko/README.md
@ -57,13 +57,7 @@ bash install.sh
 ### macOS
-다음 조건을 충족하는 Mac에서만 모델을 훈련할 수 있습니다:
+**주의: Mac에서 GPU로 훈련된 모델은 다른 장치에서 훈련된 모델에 비해 현저히 낮은 품질을 나타내므로, 우리는 일시적으로 CPU를 사용하여 훈련하고 있습니다.**
 - Apple 실리콘을 탑재한 Mac
 - macOS 12.3 이상 버전
 - `xcode-select --install`을 실행하여 Xcode 명령줄 도구가 설치됨
 **모든 Mac은 CPU를 사용하여 추론할 수 있으며, GPU 추론보다 우수한 성능을 보여주었습니다.**
 먼저 `brew install ffmpeg` 또는 `conda install ffmpeg`를 실행하여 FFmpeg가 설치되었는지 확인한 다음, 다음 명령어를 사용하여 설치하세요:
@ -71,12 +65,9 @@ bash install.sh
 conda create -n GPTSoVits python=3.9
 conda activate GPTSoVits
 pip3 install --pre torch torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
 pip install -r requirements.txt
 ```
 _참고: PyTorch Nightly가 설치되어야만 모델을 훈련할 수 있습니다._
 ### 수동 설치
 #### 의존성 설치
--- a/webui.py
+++ b/webui.py
@ -55,7 +55,7 @@ from scipy.io import wavfile
 from tools.my_utils import load_audio
 from multiprocessing import cpu_count
-os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
+# os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 当遇到mps不支持的步骤时使用cpu
 n_cpu=cpu_count()
@ -73,18 +73,19 @@ if torch.cuda.is_available() or ngpu != 0:
            if_gpu_ok = True  # 至少有一张能用的N卡
            gpu_infos.append("%s\t%s" % (i, gpu_name))
            mem.append(int(torch.cuda.get_device_properties(i).total_memory/ 1024/ 1024/ 1024+ 0.4))
-# 判断是否支持mps加速
+# # 判断是否支持mps加速
-if torch.backends.mps.is_available():
+# if torch.backends.mps.is_available():
-    if_gpu_ok = True
+#     if_gpu_ok = True
-    gpu_infos.append("%s\t%s" % ("0", "Apple GPU"))
+#     gpu_infos.append("%s\t%s" % ("0", "Apple GPU"))
-    mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存
+#     mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存
 if if_gpu_ok and len(gpu_infos) > 0:
    gpu_info = "\n".join(gpu_infos)
    default_batch_size = min(mem) // 2
 else:
-    gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
+    gpu_info = ("%s\t%s" % ("0", "CPU"))
-    default_batch_size = 1
+    gpu_infos.append("%s\t%s" % ("0", "CPU"))
    default_batch_size = psutil.virtual_memory().total/ 1024 / 1024 / 1024 / 2
 gpus = "-".join([i[0] for i in gpu_infos])
 pretrained_sovits_name="GPT_SoVITS/pretrained_models/s2G488k.pth"