Matching fast_langdetect update (#2140)

5 months ago · 959a2ddbeb
parent bb8a8efeca
commit 959a2ddbeb
4 changed files with 6 additions and 65 deletions
--- a/GPT_SoVITS/text/LangSegmenter/langsegmenter.py
+++ b/GPT_SoVITS/text/LangSegmenter/langsegmenter.py
@ -8,66 +8,7 @@ jieba.setLogLevel(logging.CRITICAL)
 # 更改fast_langdetect大模型位置
 from pathlib import Path
 import fast_langdetect
-fast_langdetect.ft_detect.infer.CACHE_DIRECTORY = Path(__file__).parent.parent.parent / "pretrained_models" / "fast_langdetect"
-
-# 防止win下无法读取模型
-import os
-from typing import Optional
-def load_fasttext_model(
-        model_path: Path,
-        download_url: Optional[str] = None,
-        proxy: Optional[str] = None,
-):
-    """
-    Load a FastText model, downloading it if necessary.
-    :param model_path: Path to the FastText model file
-    :param download_url: URL to download the model from
-    :param proxy: Proxy URL for downloading the model
-    :return: FastText model
-    :raises DetectError: If model loading fails
-    """
-    if all([
-        fast_langdetect.ft_detect.infer.VERIFY_FASTTEXT_LARGE_MODEL,
-        model_path.exists(),
-        model_path.name == fast_langdetect.ft_detect.infer.FASTTEXT_LARGE_MODEL_NAME,
-    ]):
-        if not fast_langdetect.ft_detect.infer.verify_md5(model_path, fast_langdetect.ft_detect.infer.VERIFY_FASTTEXT_LARGE_MODEL):
-            fast_langdetect.ft_detect.infer.logger.warning(
-                f"fast-langdetect: MD5 hash verification failed for {model_path}, "
-                f"please check the integrity of the downloaded file from {fast_langdetect.ft_detect.infer.FASTTEXT_LARGE_MODEL_URL}. "
-                "\n    This may seriously reduce the prediction accuracy. "
-                "If you want to ignore this, please set `fast_langdetect.ft_detect.infer.VERIFY_FASTTEXT_LARGE_MODEL = None` "
-            )
-    if not model_path.exists():
-        if download_url:
-            fast_langdetect.ft_detect.infer.download_model(download_url, model_path, proxy)
-        if not model_path.exists():
-            raise fast_langdetect.ft_detect.infer.DetectError(f"FastText model file not found at {model_path}")
-
-    try:
-        # Load FastText model
-        if (re.match(r'^[A-Za-z0-9_/\\:.]*$', str(model_path))):
-            model = fast_langdetect.ft_detect.infer.fasttext.load_model(str(model_path))
-        else:
-            python_dir = os.getcwd()
-            if (str(model_path)[:len(python_dir)].upper() == python_dir.upper()):
-                model = fast_langdetect.ft_detect.infer.fasttext.load_model(os.path.relpath(model_path, python_dir))
-            else:
-                import tempfile
-                import shutil
-                with tempfile.NamedTemporaryFile(delete=False) as tmpfile:
-                    shutil.copyfile(model_path, tmpfile.name)
-
-                model = fast_langdetect.ft_detect.infer.fasttext.load_model(tmpfile.name)
-                os.unlink(tmpfile.name)
-        return model
-
-    except Exception as e:
-        fast_langdetect.ft_detect.infer.logger.warning(f"fast-langdetect:Failed to load FastText model from {model_path}: {e}")
-        raise fast_langdetect.ft_detect.infer.DetectError(f"Failed to load FastText model: {e}")
-
-if os.name == 'nt':
-    fast_langdetect.ft_detect.infer.load_fasttext_model = load_fasttext_model
+fast_langdetect.infer._default_detector = fast_langdetect.infer.LangDetector(fast_langdetect.infer.LangDetectConfig(cache_dir=Path(__file__).parent.parent.parent / "pretrained_models" / "fast_langdetect"))


 from split_lang import LangSplitter
--- a/GPT_SoVITS/text/japanese.py
+++ b/GPT_SoVITS/text/japanese.py
@ -10,7 +10,7 @@ try:
    if os.name == 'nt':
        python_dir = os.getcwd()
        OPEN_JTALK_DICT_DIR = pyopenjtalk.OPEN_JTALK_DICT_DIR.decode("utf-8")
-        if not (re.match(r'^[A-Za-z0-9_/\\:.]*$', OPEN_JTALK_DICT_DIR)):
+        if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', OPEN_JTALK_DICT_DIR)):
            if (OPEN_JTALK_DICT_DIR[:len(python_dir)].upper() == python_dir.upper()):
                OPEN_JTALK_DICT_DIR = os.path.join(os.path.relpath(OPEN_JTALK_DICT_DIR,python_dir))
            else:
@ -25,7 +25,7 @@ try:
                OPEN_JTALK_DICT_DIR = os.path.join("TEMP", "ja", "open_jtalk_dic")
            pyopenjtalk.OPEN_JTALK_DICT_DIR = OPEN_JTALK_DICT_DIR.encode("utf-8")

-        if not (re.match(r'^[A-Za-z0-9_/\\:.]*$', current_file_path)):
+        if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', current_file_path)):
            if (current_file_path[:len(python_dir)].upper() == python_dir.upper()):
                current_file_path = os.path.join(os.path.relpath(current_file_path,python_dir))
            else:
--- a/GPT_SoVITS/text/korean.py
+++ b/GPT_SoVITS/text/korean.py
@ -19,13 +19,13 @@ if os.name == 'nt':
                print(f'you have to install eunjeon. install it...')
            else:
                installpath = spam_spec.submodule_search_locations[0]
-                if not (re.match(r'^[A-Za-z0-9_/\\:.]*$', installpath)):
+                if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', installpath)):

                    import sys
                    from eunjeon import Mecab as _Mecab
                    class Mecab(_Mecab):
                        def get_dicpath(installpath):
-                            if not (re.match(r'^[A-Za-z0-9_/\\:.]*$', installpath)):
+                            if not (re.match(r'^[A-Za-z0-9_/\\:.\-]*$', installpath)):
                                import shutil
                                python_dir = os.getcwd()
                                if (installpath[:len(python_dir)].upper() == python_dir.upper()):
--- a/requirements.txt
+++ b/requirements.txt
@ -25,7 +25,7 @@ psutil
 jieba_fast
 jieba
 split-lang
-fast_langdetect
+fast_langdetect>=0.3.0
 Faster_Whisper
 wordsegment
 rotary_embedding_torch