@ -1,5 +1,6 @@
import os , sys
import os , sys
import threading
from tqdm import tqdm
from tqdm import tqdm
now_dir = os . getcwd ( )
now_dir = os . getcwd ( )
@ -54,6 +55,7 @@ class TextPreprocessor:
self . bert_model = bert_model
self . bert_model = bert_model
self . tokenizer = tokenizer
self . tokenizer = tokenizer
self . device = device
self . device = device
self . bert_lock = threading . RLock ( )
def preprocess ( self , text : str , lang : str , text_split_method : str , version : str = " v2 " ) - > List [ Dict ] :
def preprocess ( self , text : str , lang : str , text_split_method : str , version : str = " v2 " ) - > List [ Dict ] :
print ( f ' ############ { i18n ( " 切分文本 " ) } ############ ' )
print ( f ' ############ { i18n ( " 切分文本 " ) } ############ ' )
@ -117,6 +119,7 @@ class TextPreprocessor:
return self . get_phones_and_bert ( text , language , version )
return self . get_phones_and_bert ( text , language , version )
def get_phones_and_bert ( self , text : str , language : str , version : str , final : bool = False ) :
def get_phones_and_bert ( self , text : str , language : str , version : str , final : bool = False ) :
with self . bert_lock :
if language in { " en " , " all_zh " , " all_ja " , " all_ko " , " all_yue " } :
if language in { " en " , " all_zh " , " all_ja " , " all_ko " , " all_yue " } :
# language = language.replace("all_","")
# language = language.replace("all_","")
formattext = text
formattext = text