From dba1a74ccb0cf19a1b4eb93faf11d4ec2b1fc5d7 Mon Sep 17 00:00:00 2001 From: RVC-Boss <129054828+RVC-Boss@users.noreply.github.com> Date: Sat, 3 Feb 2024 00:01:26 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Duvr5=E8=AF=86=E5=88=AB?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=A4=B1=E8=B4=A5=E6=8A=A5=E9=94=99=E9=97=AE?= =?UTF-8?q?=E9=A2=98=E3=80=82=E9=99=A4=E9=9D=9E=E5=88=97=E4=B8=BE=E4=B8=8D?= =?UTF-8?q?=E8=83=BD=E8=AF=86=E5=88=AB=E7=9A=84bad=20case=EF=BC=8C?= =?UTF-8?q?=E5=90=A6=E5=88=99=E7=A6=81=E6=AD=A2=E6=94=B9=E5=8A=A8=E6=AD=A4?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复uvr5识别格式失败报错问题。除非列举不能识别的bad case,否则禁止改动此文件。 --- tools/webui.py | 178 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 tools/webui.py diff --git a/tools/webui.py b/tools/webui.py new file mode 100644 index 0000000..41ec588 --- /dev/null +++ b/tools/webui.py @@ -0,0 +1,178 @@ +import os +import traceback,gradio as gr +import logging +from tools.i18n.i18n import I18nAuto +i18n = I18nAuto() + +logger = logging.getLogger(__name__) +import librosa,ffmpeg +import soundfile as sf +import torch +import sys +from mdxnet import MDXNetDereverb +from vr import AudioPre, AudioPreDeEcho + +weight_uvr5_root = "tools/uvr5/uvr5_weights" +uvr5_names = [] +for name in os.listdir(weight_uvr5_root): + if name.endswith(".pth") or "onnx" in name: + uvr5_names.append(name.replace(".pth", "")) + +device=sys.argv[1] +is_half=sys.argv[2] +webui_port_uvr5=int(sys.argv[3]) +is_share=eval(sys.argv[4]) + +def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): + infos = [] + try: + inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + save_root_vocal = ( + save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + ) + save_root_ins = ( + save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + ) + if model_name == "onnx_dereverb_By_FoxJoy": + from MDXNet import MDXNetDereverb + + pre_fun = MDXNetDereverb(15) + else: + func = AudioPre if "DeEcho" not in model_name else AudioPreDeEcho + pre_fun = func( + agg=int(agg), + model_path=os.path.join(weight_uvr5_root, model_name + ".pth"), + device=device, + is_half=is_half, + ) + if inp_root != "": + paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)] + else: + paths = [path.name for path in paths] + for path in paths: + inp_path = os.path.join(inp_root, path) + if(os.path.isfile(inp_path)==False):continue + need_reformat = 1 + done = 0 + try: + info = ffmpeg.probe(inp_path, cmd="ffprobe") + if ( + info["streams"][0]["channels"] == 2 + and info["streams"][0]["sample_rate"] == "44100" + ): + need_reformat = 0 + pre_fun._path_audio_( + inp_path, save_root_ins, save_root_vocal, format0 + ) + done = 1 + except: + need_reformat = 1 + traceback.print_exc() + if need_reformat == 1: + tmp_path = "%s/%s.reformatted.wav" % ( + os.path.join(os.environ["TEMP"]), + os.path.basename(inp_path), + ) + os.system( + "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y" + % (inp_path, tmp_path) + ) + inp_path = tmp_path + try: + if done == 0: + pre_fun._path_audio_( + inp_path, save_root_ins, save_root_vocal, format0 + ) + infos.append("%s->Success" % (os.path.basename(inp_path))) + yield "\n".join(infos) + except: + infos.append( + "%s->%s" % (os.path.basename(inp_path), traceback.format_exc()) + ) + yield "\n".join(infos) + except: + infos.append(traceback.format_exc()) + yield "\n".join(infos) + finally: + try: + if model_name == "onnx_dereverb_By_FoxJoy": + del pre_fun.pred.model + del pre_fun.pred.model_ + else: + del pre_fun.model + del pre_fun + except: + traceback.print_exc() + print("clean_empty_cache") + if torch.cuda.is_available(): + torch.cuda.empty_cache() + yield "\n".join(infos) + +with gr.Blocks(title="UVR5 WebUI") as app: + gr.Markdown( + value= + i18n("本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE.") + ) + with gr.Tabs(): + with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")): + with gr.Group(): + gr.Markdown( + value=i18n( + "人声伴奏分离批量处理, 使用UVR5模型。
合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。
模型分为三类:
1、保留人声:不带和声的音频选这个,对主人声保留比HP5更好。内置HP2和HP3两个模型,HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点;
2、仅保留主人声:带和声的音频选这个,对主人声可能有削弱。内置HP5一个模型;
3、去混响、去延迟模型(by FoxJoy):
  (1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;
 (234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底,DeReverb额外去除混响,可去除单声道混响,但是对高频重的板式混响去不干净。
去混响/去延迟,附:
1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍;
2、MDX-Net-Dereverb模型挺慢的;
3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。" + ) + ) + with gr.Row(): + with gr.Column(): + dir_wav_input = gr.Textbox( + label=i18n("输入待处理音频文件夹路径"), + placeholder="C:\\Users\\Desktop\\todo-songs", + ) + wav_inputs = gr.File( + file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") + ) + with gr.Column(): + model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names) + agg = gr.Slider( + minimum=0, + maximum=20, + step=1, + label=i18n("人声提取激进程度"), + value=10, + interactive=True, + visible=False, # 先不开放调整 + ) + opt_vocal_root = gr.Textbox( + label=i18n("指定输出主人声文件夹"), value="output/uvr5_opt" + ) + opt_ins_root = gr.Textbox( + label=i18n("指定输出非主人声文件夹"), value="output/uvr5_opt" + ) + format0 = gr.Radio( + label=i18n("导出文件格式"), + choices=["wav", "flac", "mp3", "m4a"], + value="flac", + interactive=True, + ) + but2 = gr.Button(i18n("转换"), variant="primary") + vc_output4 = gr.Textbox(label=i18n("输出信息")) + but2.click( + uvr, + [ + model_choose, + dir_wav_input, + opt_vocal_root, + wav_inputs, + opt_ins_root, + agg, + format0, + ], + [vc_output4], + api_name="uvr_convert", + ) +app.queue(concurrency_count=511, max_size=1022).launch( + server_name="0.0.0.0", + inbrowser=True, + share=is_share, + server_port=webui_port_uvr5, + quiet=True, +)