Fix bug in #1660 and #1667 (#1670)

* fix #1660 #1667 * add japanese cmudict-katakana dict
10 months ago · eee607b71d
parent a95b2b85f7
commit eee607b71d
2 changed files with 134230 additions and 27 deletions
--- a/GPT_SoVITS/text/ja_userdic/userdict.csv
+++ b/GPT_SoVITS/text/ja_userdic/userdict.csv
--- a/GPT_SoVITS/text/japanese.py
+++ b/GPT_SoVITS/text/japanese.py
@ -1,9 +1,9 @@
 # modified from https://github.com/CjangCjengh/vits/blob/main/text/japanese.py
 import re
-
-import pyopenjtalk
 import os
 import hashlib
+try:
+    import pyopenjtalk
    current_file_path = os.path.dirname(__file__)
    def get_hash(fp: str) -> str:
        hash_md5 = hashlib.md5()
@ -24,6 +24,11 @@ if os.path.exists(USERDIC_CSV_PATH):

    if os.path.exists(USERDIC_BIN_PATH):
        pyopenjtalk.update_global_jtalk_with_user_dict(USERDIC_BIN_PATH)   
+except Exception as e:
+    # print(e)
+    import pyopenjtalk
+    # failed to load user dictionary, ignore.
+    pass


 from text.symbols import punctuation
@ -80,10 +85,6 @@ def post_replace_ph(ph):

    if ph in rep_map.keys():
        ph = rep_map[ph]
-    # if ph in symbols:
-    #     return ph
-    # if ph not in symbols:
-    #     ph = "UNK"
    return ph


@ -103,6 +104,8 @@ def symbols_to_japanese(text):
 def preprocess_jap(text, with_prosody=False):
    """Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html"""
    text = symbols_to_japanese(text)
+    # English words to lower case, should have no influence on japanese words.
+    text = text.lower()
    sentences = re.split(_japanese_marks, text)
    marks = re.findall(_japanese_marks, text)
    text = []
@ -219,5 +222,5 @@ def g2p(norm_text, with_prosody=True):


 if __name__ == "__main__":
-    phones = g2p("こんにちは, hello, AKITOです,よろしくお願いしますね！")
+    phones = g2p("Hello.こんにちは！今日もNiCe天気ですね！tokyotowerに行きましょう！")
    print(phones)