Fix bug in #1660 and #1667 (#1670)

* fix #1660 #1667

* add japanese cmudict-katakana dict
main
AkitoLiu 10 months ago committed by GitHub
parent a95b2b85f7
commit eee607b71d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

File diff suppressed because it is too large Load Diff

@ -1,9 +1,9 @@
# modified from https://github.com/CjangCjengh/vits/blob/main/text/japanese.py
import re
import pyopenjtalk
import os
import hashlib
try:
import pyopenjtalk
current_file_path = os.path.dirname(__file__)
def get_hash(fp: str) -> str:
hash_md5 = hashlib.md5()
@ -24,6 +24,11 @@ if os.path.exists(USERDIC_CSV_PATH):
if os.path.exists(USERDIC_BIN_PATH):
pyopenjtalk.update_global_jtalk_with_user_dict(USERDIC_BIN_PATH)
except Exception as e:
# print(e)
import pyopenjtalk
# failed to load user dictionary, ignore.
pass
from text.symbols import punctuation
@ -80,10 +85,6 @@ def post_replace_ph(ph):
if ph in rep_map.keys():
ph = rep_map[ph]
# if ph in symbols:
# return ph
# if ph not in symbols:
# ph = "UNK"
return ph
@ -103,6 +104,8 @@ def symbols_to_japanese(text):
def preprocess_jap(text, with_prosody=False):
"""Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html"""
text = symbols_to_japanese(text)
# English words to lower case, should have no influence on japanese words.
text = text.lower()
sentences = re.split(_japanese_marks, text)
marks = re.findall(_japanese_marks, text)
text = []
@ -219,5 +222,5 @@ def g2p(norm_text, with_prosody=True):
if __name__ == "__main__":
phones = g2p("こんにちは, hello, AKITOです,よろしくお願いしますね")
phones = g2p("Hello.こんにちは今日もNiCe天気ですねtokyotowerに行きましょう")
print(phones)

Loading…
Cancel
Save