support mac m1-m4 (#376)

Co-authored-by: yuheng <lipku@163.com>
4 months ago · 2cdf382897
parent 7b340cc9a2
commit 2cdf382897
17 changed files with 33 additions and 26 deletions
--- a/ernerf/data_utils/face_tracking/face_tracker.py
+++ b/ernerf/data_utils/face_tracking/face_tracker.py
@ -179,8 +179,11 @@ print(f'[INFO] fitting light...')
 batch_size = 32
-device_default = torch.device("cuda:0")
+device_default = torch.device("cuda:0" if torch.cuda.is_available() else (
-device_render = torch.device("cuda:0")
+    "mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu"))
 device_render = torch.device("cuda:0" if torch.cuda.is_available() else (
    "mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu"))
 renderer = Render_3DMM(arg_focal, h, w, batch_size, device_render)
 sel_ids = np.arange(0, num_frames, int(num_frames / batch_size))[:batch_size]
--- a/ernerf/data_utils/face_tracking/render_3dmm.py
+++ b/ernerf/data_utils/face_tracking/render_3dmm.py
@ -83,7 +83,7 @@ class Render_3DMM(nn.Module):
        img_h=500,
        img_w=500,
        batch_size=1,
-        device=torch.device("cuda:0"),
+        device=torch.device("cuda:0" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu")),
    ):
        super(Render_3DMM, self).__init__()
--- a/ernerf/main.py
+++ b/ernerf/main.py
@ -147,7 +147,7 @@ if __name__ == '__main__':
    seed_everything(opt.seed)
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu"))
    model = NeRFNetwork(opt)
--- a/ernerf/nerf_triplane/utils.py
+++ b/ernerf/nerf_triplane/utils.py
@ -442,7 +442,7 @@ class LPIPSMeter:
        self.N = 0
        self.net = net
-        self.device = device if device is not None else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.device = device if device is not None else torch.device('cuda' if torch.cuda.is_available() else ('mps' if hasattr(torch.backends, "mps") and torch.backends.mps.is_available() else 'cpu'))
        self.fn = lpips.LPIPS(net=net).eval().to(self.device)
    def clear(self):
@ -618,7 +618,11 @@ class Trainer(object):
        self.flip_init_lips = self.opt.init_lips
        self.time_stamp = time.strftime("%Y-%m-%d_%H-%M-%S")
        self.scheduler_update_every_step = scheduler_update_every_step
-        self.device = device if device is not None else torch.device(f'cuda:{local_rank}' if torch.cuda.is_available() else 'cpu')
+        self.device = device if device is not None else torch.device(
            f'cuda:{local_rank}' if torch.cuda.is_available() else (
                'mps' if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else 'cpu'
            )
        )
        self.console = Console()
        model.to(self.device)
--- a/lightreal.py
+++ b/lightreal.py
@ -56,10 +56,8 @@ from ultralight.unet import Model
 from ultralight.audio2feature import Audio2Feature
 from logger import logger
-
+device = "cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu")
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
+print('Using {} for inference.'.format(device))
 logger.info('Using {} for inference.'.format(device))
 def load_model(opt):
    audio_processor = Audio2Feature()
--- a/lipreal.py
+++ b/lipreal.py
@ -44,8 +44,8 @@ from basereal import BaseReal
 from tqdm import tqdm
 from logger import logger
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
+device = "cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu")
-logger.info('Using {} for inference.'.format(device))
+print('Using {} for inference.'.format(device))
 def _load(checkpoint_path):
 	if device == 'cuda':
--- a/musereal.py
+++ b/musereal.py
@ -51,7 +51,7 @@ from logger import logger
 def load_model():
    # load model weights
    audio_processor,vae, unet, pe = load_all_model()
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu"))
    timesteps = torch.tensor([0], device=device)
    pe = pe.half()
    vae.vae = vae.vae.half()
@ -77,7 +77,7 @@ def load_avatar(avatar_id):
    #     "bbox_shift":self.bbox_shift   
    # }
-    input_latent_list_cycle = torch.load(latents_out_path)  #,weights_only=True
+    input_latent_list_cycle = torch.load(latents_out_path, map_location=torch.device('mps'))  #,weights_only=True
    with open(coords_path, 'rb') as f:
        coord_list_cycle = pickle.load(f)
    input_img_list = glob.glob(os.path.join(full_imgs_path, '*.[jpJP][pnPN]*[gG]'))
--- a/musetalk/models/unet.py
+++ b/musetalk/models/unet.py
@ -36,7 +36,7 @@ class UNet():
            unet_config = json.load(f)
        self.model = UNet2DConditionModel(**unet_config)
        self.pe = PositionalEncoding(d_model=384)
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu"))
        weights = torch.load(model_path) if torch.cuda.is_available() else torch.load(model_path, map_location=self.device)
        self.model.load_state_dict(weights)
        if use_float16:
--- a/musetalk/models/vae.py
+++ b/musetalk/models/vae.py
@ -23,7 +23,7 @@ class VAE():
        self.model_path = model_path
        self.vae = AutoencoderKL.from_pretrained(self.model_path)
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu"))
        self.vae.to(self.device)
        if use_float16:
--- a/musetalk/simple_musetalk.py
+++ b/musetalk/simple_musetalk.py
@ -325,7 +325,7 @@ def create_musetalk_human(file, avatar_id):
 # initialize the mmpose model
-device = "cuda" if torch.cuda.is_available() else "cpu"
+device = "cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu")
 fa = FaceAlignment(1, flip_input=False, device=device)
 config_file = os.path.join(current_dir, 'utils/dwpose/rtmpose-l_8xb32-270e_coco-ubody-wholebody-384x288.py')
 checkpoint_file = os.path.abspath(os.path.join(current_dir, '../models/dwpose/dw-ll_ucoco_384.pth'))
--- a/musetalk/utils/face_parsing/resnet.py
+++ b/musetalk/utils/face_parsing/resnet.py
@ -80,7 +80,7 @@ class Resnet18(nn.Module):
        return feat8, feat16, feat32
    def init_weight(self, model_path):
-        state_dict = torch.load(model_path) #modelzoo.load_url(resnet18_url)
+        state_dict = torch.load(model_path, weights_only=False) #modelzoo.load_url(resnet18_url)
        self_state_dict = self.state_dict()
        for k, v in state_dict.items():
            if 'fc' in k: continue
--- a/musetalk/utils/preprocessing.py
+++ b/musetalk/utils/preprocessing.py
@ -13,14 +13,14 @@ import torch
 from tqdm import tqdm
 # initialize the mmpose model
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu"))
 config_file = './musetalk/utils/dwpose/rtmpose-l_8xb32-270e_coco-ubody-wholebody-384x288.py'
 checkpoint_file = './models/dwpose/dw-ll_ucoco_384.pth'
 model = init_model(config_file, checkpoint_file, device=device)
 # initialize the face detection model
-device = "cuda" if torch.cuda.is_available() else "cpu"
+device = "cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu")
-fa = FaceAlignment(LandmarksType._2D, flip_input=False,device=device)
+fa = FaceAlignment(LandmarksType._2D, flip_input=False, device=device)
 # maker if the bbox is not sufficient 
 coord_placeholder = (0.0,0.0,0.0,0.0)
--- a/musetalk/whisper/whisper/init.py
+++ b/musetalk/whisper/whisper/init.py
@ -91,7 +91,7 @@ def load_model(name: str, device: Optional[Union[str, torch.device]] = None, dow
    """
    if device is None:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = "cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu")
    if download_root is None:
        download_root = os.getenv(
            "XDG_CACHE_HOME", 
--- a/musetalk/whisper/whisper/transcribe.py
+++ b/musetalk/whisper/whisper/transcribe.py
@ -78,6 +78,8 @@ def transcribe(
        if dtype == torch.float16:
            warnings.warn("FP16 is not supported on CPU; using FP32 instead")
            dtype = torch.float32
        if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
            warnings.warn("Performing inference on CPU when MPS is available")
    if dtype == torch.float32:
        decode_options["fp16"] = False
@ -135,7 +137,7 @@ def cli():
    parser.add_argument("audio", nargs="+", type=str, help="audio file(s) to transcribe")
    parser.add_argument("--model", default="small", choices=available_models(), help="name of the Whisper model to use")
    parser.add_argument("--model_dir", type=str, default=None, help="the path to save model files; uses ~/.cache/whisper by default")
-    parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu", help="device to use for PyTorch inference")
+    parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "mps", help="device to use for PyTorch inference")
    parser.add_argument("--output_dir", "-o", type=str, default=".", help="directory to save the outputs")
    parser.add_argument("--verbose", type=str2bool, default=True, help="whether to print out the progress and debug messages")
--- a/nerfasr.py
+++ b/nerfasr.py
@ -30,7 +30,7 @@ class NerfASR(BaseASR):
    def __init__(self, opt, parent, audio_processor,audio_model):
        super().__init__(opt,parent)
-        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.device = "cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu")
        if 'esperanto' in self.opt.asr_model:
            self.audio_dim = 44
        elif 'deepspeech' in self.opt.asr_model:
--- a/nerfreal.py
+++ b/nerfreal.py
@ -77,7 +77,7 @@ def load_model(opt):
    seed_everything(opt.seed)
    logger.info(opt)
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    device = torch.device('cuda' if torch.cuda.is_available() else ('mps' if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else 'cpu'))
    model = NeRFNetwork(opt)
    criterion = torch.nn.MSELoss(reduction='none')
--- a/ultralight/unet.py
+++ b/ultralight/unet.py
@ -236,7 +236,7 @@ if __name__ == '__main__':
            if hasattr(module, 'reparameterize'):
                module.reparameterize()
        return model
-    device = torch.device("cuda")
+    device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if (hasattr(torch.backends, "mps") and torch.backends.mps.is_available()) else "cpu"))
    def check_onnx(torch_out, torch_in, audio):
        onnx_model = onnx.load(onnx_path)
        onnx.checker.check_model(onnx_model)