Replace cantonese g2p with "ToJyutping" (#1697)

7 months ago · b7a904a671
parent a1fe2267af
commit b7a904a671
2 changed files with 20 additions and 8 deletions
--- a/GPT_SoVITS/text/cantonese.py
+++ b/GPT_SoVITS/text/cantonese.py
@ -3,8 +3,8 @@
 import sys
 import re
 import cn2an
 import ToJyutping
 from pyjyutping import jyutping
 from text.symbols import punctuation
 from text.zh_normalization.text_normlization import TextNormalizer
@ -173,12 +173,24 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
 def get_jyutping(text):
-    jp = jyutping.convert(text)
+    jyutping_array = []
-    # print(1111111,jp)
+    punct_pattern = re.compile(r"^[{}]+$".format(re.escape("".join(punctuation))))
-    for symbol in punctuation:
+
-        jp = jp.replace(symbol, " " + symbol + " ")
+    syllables = ToJyutping.get_jyutping_list(text)
-    jp_array = jp.split()
+
-    return jp_array
+    for word, syllable in syllables:
        if punct_pattern.match(word):
            puncts = re.split(r"([{}])".format(re.escape("".join(punctuation))), word)
            for punct in puncts:
                if len(punct) > 0:
                    jyutping_array.append(punct)
        else:
            # match multple jyutping eg: liu4 ge3, or single jyutping eg: liu4
            if not re.search(r"^([a-z]+[1-6]+[ ]?)+$", syllable):
                raise ValueError(f"Failed to convert {word} to jyutping: {syllable}")
            jyutping_array.append(syllable)
    return jyutping_array
 def get_bert_feature(text, word2ph):
--- a/requirements.txt
+++ b/requirements.txt
@ -27,7 +27,7 @@ LangSegment>=0.2.0
 Faster_Whisper
 wordsegment
 rotary_embedding_torch
-pyjyutping 
+ToJyutping 
 g2pk2
 ko_pron
 opencc; sys_platform != 'linux'