mps support

2 years ago · 07a5339691
parent 8069264e64
commit 07a5339691
8 changed files with 70 additions and 33 deletions
--- a/GPT_SoVITS/AR/data/bucket_sampler.py
+++ b/GPT_SoVITS/AR/data/bucket_sampler.py
@ -41,11 +41,12 @@ class DistributedBucketSampler(Sampler[T_co]):
        if num_replicas is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
-            num_replicas = dist.get_world_size()
+            num_replicas = dist.get_world_size() if torch.cuda.is_available() else 1
        if rank is None:
            if not dist.is_available():
                raise RuntimeError("Requires distributed package to be available")
-            rank = dist.get_rank()
+            rank = dist.get_rank() if torch.cuda.is_available() else 0
            if torch.cuda.is_available():
                torch.cuda.set_device(rank)
        if rank >= num_replicas or rank < 0:
            raise ValueError(
--- a/GPT_SoVITS/inference_webui.py
+++ b/GPT_SoVITS/inference_webui.py
@ -35,9 +35,11 @@ from my_utils import load_audio
 from tools.i18n.i18n import I18nAuto
 i18n = I18nAuto()
 os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # 确保直接启动推理UI时也能够设置。
 if torch.cuda.is_available():
    device = "cuda"
-elif torch.mps.is_available():
+elif torch.backends.mps.is_available():
    device = "mps"
 else:
    device = "cpu"
--- a/GPT_SoVITS/prepare_datasets/1-get-text.py
+++ b/GPT_SoVITS/prepare_datasets/1-get-text.py
@ -46,7 +46,7 @@ if os.path.exists(txt_path) == False:
    bert_dir = "%s/3-bert" % (opt_dir)
    os.makedirs(opt_dir, exist_ok=True)
    os.makedirs(bert_dir, exist_ok=True)
-    device = "cuda:0"
+    device = "cuda:0" if torch.cuda.is_available() else "mps"
    tokenizer = AutoTokenizer.from_pretrained(bert_pretrained_dir)
    bert_model = AutoModelForMaskedLM.from_pretrained(bert_pretrained_dir)
    if is_half == True:
--- a/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py
+++ b/GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py
@ -47,7 +47,7 @@ os.makedirs(wav32dir,exist_ok=True)
 maxx=0.95
 alpha=0.5
-device="cuda:0"
+device="cuda:0" if torch.cuda.is_available() else "mps"
 model=cnhubert.get_model()
 # is_half=False
 if(is_half==True):
--- a/GPT_SoVITS/prepare_datasets/3-get-semantic.py
+++ b/GPT_SoVITS/prepare_datasets/3-get-semantic.py
@ -38,7 +38,7 @@ semantic_path = "%s/6-name2semantic-%s.tsv" % (opt_dir, i_part)
 if os.path.exists(semantic_path) == False:
    os.makedirs(opt_dir, exist_ok=True)
-    device = "cuda:0"
+    device = "cuda:0" if torch.cuda.is_available() else "mps"
    hps = utils.get_hparams_from_file(s2config_path)
    vq_model = SynthesizerTrn(
        hps.data.filter_length // 2 + 1,
--- a/GPT_SoVITS/s1_train.py
+++ b/GPT_SoVITS/s1_train.py
@ -116,7 +116,7 @@ def main(args):
        devices=-1,
        benchmark=False,
        fast_dev_run=False,
-        strategy = "auto" if torch.mps.is_available() else DDPStrategy(
+        strategy = "auto" if torch.backends.mps.is_available() else DDPStrategy(
            process_group_backend="nccl" if platform.system() != "Windows" else "gloo"
        ),  # mps 不支持多节点训练
        precision=config["train"]["precision"],
--- a/GPT_SoVITS/s2_train.py
+++ b/GPT_SoVITS/s2_train.py
@ -44,8 +44,11 @@ global_step = 0
 def main():
    """Assume Single Node Multi GPUs Training Only"""
-    assert torch.cuda.is_available(), "CPU training is not allowed."
+    assert torch.cuda.is_available() or torch.backends.mps.is_available(), "Only GPU training is allowed."
    if torch.backends.mps.is_available():
        n_gpus = 1
    else:
        n_gpus = torch.cuda.device_count()
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = str(randint(20000, 55555))
@ -70,12 +73,13 @@ def run(rank, n_gpus, hps):
        writer_eval = SummaryWriter(log_dir=os.path.join(hps.s2_ckpt_dir, "eval"))
    dist.init_process_group(
-        backend="gloo" if os.name == "nt" else "nccl",
+        backend = "gloo" if os.name == "nt" or torch.backends.mps.is_available() else "nccl",
        init_method="env://",
        world_size=n_gpus,
        rank=rank,
    )
    torch.manual_seed(hps.train.seed)
    if torch.cuda.is_available():
        torch.cuda.set_device(rank)
    train_dataset = TextAudioSpeakerLoader(hps.data)  ########
@ -128,9 +132,14 @@ def run(rank, n_gpus, hps):
        hps.train.segment_size // hps.data.hop_length,
        n_speakers=hps.data.n_speakers,
        **hps.model,
-    ).cuda(rank)
+    ).cuda(rank) if torch.cuda.is_available() else SynthesizerTrn(
        hps.data.filter_length // 2 + 1,
        hps.train.segment_size // hps.data.hop_length,
        n_speakers=hps.data.n_speakers,
        **hps.model,
    ).to("mps")
-    net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank)
+    net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) if torch.cuda.is_available() else MultiPeriodDiscriminator(hps.model.use_spectral_norm).to("mps")
    for name, param in net_g.named_parameters():
        if not param.requires_grad:
            print(name, "not requires_grad")
@ -174,8 +183,12 @@ def run(rank, n_gpus, hps):
        betas=hps.train.betas,
        eps=hps.train.eps,
    )
    if torch.cuda.is_available():
        net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
        net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
    else:
        net_g = net_g.to("mps")
        net_d = net_d.to("mps")
    try:  # 如果能加载自动resume
        _, _, _, epoch_str = utils.load_checkpoint(
@ -205,6 +218,9 @@ def run(rank, n_gpus, hps):
                net_g.module.load_state_dict(
                    torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"],
                    strict=False,
                ) if torch.cuda.is_available() else net_g.load_state_dict(
                    torch.load(hps.train.pretrained_s2G, map_location="cpu")["weight"],
                    strict=False,
                )
            )  ##测试不加载优化器
        if hps.train.pretrained_s2D != "":
@ -213,6 +229,8 @@ def run(rank, n_gpus, hps):
            print(
                net_d.module.load_state_dict(
                    torch.load(hps.train.pretrained_s2D, map_location="cpu")["weight"]
                ) if torch.cuda.is_available() else net_d.load_state_dict(
                    torch.load(hps.train.pretrained_s2D, map_location="cpu")["weight"]
                )
            )
@ -288,6 +306,7 @@ def train_and_evaluate(
        text,
        text_lengths,
    ) in tqdm(enumerate(train_loader)):
        if torch.cuda.is_available():
            spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(
                rank, non_blocking=True
            )
@ -300,6 +319,13 @@ def train_and_evaluate(
            text, text_lengths = text.cuda(rank, non_blocking=True), text_lengths.cuda(
                rank, non_blocking=True
            )
        else:
            spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps")
            y, y_lengths = y.to("mps"), y_lengths.to("mps")
            ssl = ssl.to("mps")
            ssl.requires_grad = False
            # ssl_lengths = ssl_lengths.cuda(rank, non_blocking=True)
            text, text_lengths = text.to("mps"), text_lengths.to("mps")
        with autocast(enabled=hps.train.fp16_run):
            (
@ -500,13 +526,21 @@ def evaluate(hps, generator, eval_loader, writer_eval):
            text_lengths,
        ) in enumerate(eval_loader):
            print(111)
            if torch.cuda.is_available():
                spec, spec_lengths = spec.cuda(), spec_lengths.cuda()
                y, y_lengths = y.cuda(), y_lengths.cuda()
                ssl = ssl.cuda()
                text, text_lengths = text.cuda(), text_lengths.cuda()
            else:
                spec, spec_lengths = spec.to("mps"), spec_lengths.to("mps")
                y, y_lengths = y.to("mps"), y_lengths.to("mps")
                ssl = ssl.to("mps")
                text, text_lengths = text.to("mps"), text_lengths.to("mps")
            for test in [0, 1]:
                y_hat, mask, *_ = generator.module.infer(
                    ssl, spec, spec_lengths, text, text_lengths, test=test
                ) if torch.cuda.is_available() else generator.infer(
                    ssl, spec, spec_lengths, text, text_lengths, test=test
                )
                y_hat_lengths = mask.sum([1, 2]).long() * hps.data.hop_length
--- a/config.py
+++ b/config.py
@ -17,7 +17,7 @@ exp_root = "logs"
 python_exec = sys.executable or "python"
 if torch.cuda.is_available():
    infer_device = "cuda"
-elif torch.mps.is_available():
+elif torch.backends.mps.is_available():
    infer_device = "mps"
 else:
    infer_device = "cpu"