From 7ecdb5f3bcb2c4bb158c340c7c9f3d27701def93 Mon Sep 17 00:00:00 2001 From: lipku Date: Sun, 22 Jun 2025 19:56:02 +0800 Subject: [PATCH] Improve the API interface --- README.md | 9 ++-- app.py | 138 ++++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 104 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 7d80564..de14ebe 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ [English](./README-EN.md) | 中文版 实时交互流式数字人,实现音视频同步对话。基本可以达到商用效果 -[wav2lip效果](https://www.bilibili.com/video/BV1scwBeyELA/) | [ernerf效果](https://www.bilibili.com/video/BV1G1421z73r/) | [musetalk效果](https://www.bilibili.com/video/BV1gm421N7vQ/) +[wav2lip效果](https://www.bilibili.com/video/BV1scwBeyELA/) | [ernerf效果](https://www.bilibili.com/video/BV1G1421z73r/) | [musetalk效果](https://www.bilibili.com/video/BV1gm421N7vQ/) +国内镜像地址: ## 为避免与3d数字人混淆,原项目metahuman-stream改名为livetalking,原有链接地址继续可用 @@ -54,9 +55,10 @@ GoogleDriver 服务端需要开放端口 tcp:8010; udp:1-65536 -如果需要商用高清wav2lip模型,[链接](https://livetalking-doc.readthedocs.io/zh-cn/latest/service.html#wav2lip) +客户端可以选用以下两种方式: +(1)用浏览器打开http://serverip:8010/webrtcapi.html , 先点‘start',播放数字人视频;然后在文本框输入任意文字,提交。数字人播报该段文字 +(2)用客户端方式, 下载地址 - 快速体验 用该镜像创建实例即可运行成功 @@ -108,6 +110,7 @@ wav2lip256显卡3060以上即可,musetalk需要3080Ti以上。 5. 动作编排:不说话时动作、唤醒时动作、思考时动作、进入休眠动作 6. 支持不限时长的数字人形象avatar 7. 提供实时音频流输入接口 +8. 数字人透明背景,能叠加动态背景 ## 7. 声明 基于本项目开发并发布在B站、视频号、抖音等网站上的视频需带上LiveTalking水印和标识,如需去除请联系作者备案授权。 diff --git a/app.py b/app.py index 855dd4d..4ef5a43 100644 --- a/app.py +++ b/app.py @@ -45,6 +45,7 @@ import asyncio import torch from typing import Dict from logger import logger +import gc app = Flask(__name__) @@ -87,7 +88,12 @@ async def offer(request): if len(nerfreals) >= opt.max_session: logger.info('reach max session') - return -1 + return web.Response( + content_type="application/json", + text=json.dumps( + {"code": -1, "msg": "reach max session"} + ), + ) sessionid = randN(6) #len(nerfreals) logger.info('sessionid=%d',sessionid) nerfreals[sessionid] = None @@ -109,6 +115,7 @@ async def offer(request): if pc.connectionState == "closed": pcs.discard(pc) del nerfreals[sessionid] + gc.collect() player = HumanPlayer(nerfreals[sessionid]) audio_sender = pc.addTrack(player.audio) @@ -135,24 +142,55 @@ async def offer(request): ) async def human(request): - params = await request.json() + try: + params = await request.json() - sessionid = params.get('sessionid',0) - if params.get('interrupt'): - nerfreals[sessionid].flush_talk() + sessionid = params.get('sessionid',0) + if params.get('interrupt'): + nerfreals[sessionid].flush_talk() - if params['type']=='echo': - nerfreals[sessionid].put_msg_txt(params['text']) - elif params['type']=='chat': - res=await asyncio.get_event_loop().run_in_executor(None, llm_response, params['text'],nerfreals[sessionid]) - #nerfreals[sessionid].put_msg_txt(res) + if params['type']=='echo': + nerfreals[sessionid].put_msg_txt(params['text']) + elif params['type']=='chat': + asyncio.get_event_loop().run_in_executor(None, llm_response, params['text'],nerfreals[sessionid]) + #nerfreals[sessionid].put_msg_txt(res) - return web.Response( - content_type="application/json", - text=json.dumps( - {"code": 0, "data":"ok"} - ), - ) + return web.Response( + content_type="application/json", + text=json.dumps( + {"code": 0, "msg":"ok"} + ), + ) + except Exception as e: + logger.exception('exception:') + return web.Response( + content_type="application/json", + text=json.dumps( + {"code": -1, "msg": str(e)} + ), + ) + +async def interrupt_talk(request): + try: + params = await request.json() + + sessionid = params.get('sessionid',0) + nerfreals[sessionid].flush_talk() + + return web.Response( + content_type="application/json", + text=json.dumps( + {"code": 0, "msg":"ok"} + ), + ) + except Exception as e: + logger.exception('exception:') + return web.Response( + content_type="application/json", + text=json.dumps( + {"code": -1, "msg": str(e)} + ), + ) async def humanaudio(request): try: @@ -170,41 +208,60 @@ async def humanaudio(request): ), ) except Exception as e: + logger.exception('exception:') return web.Response( content_type="application/json", text=json.dumps( - {"code": -1, "msg":"err","data": ""+e.args[0]+""} + {"code": -1, "msg": str(e)} ), ) async def set_audiotype(request): - params = await request.json() + try: + params = await request.json() - sessionid = params.get('sessionid',0) - nerfreals[sessionid].set_custom_state(params['audiotype'],params['reinit']) + sessionid = params.get('sessionid',0) + nerfreals[sessionid].set_custom_state(params['audiotype'],params['reinit']) - return web.Response( - content_type="application/json", - text=json.dumps( - {"code": 0, "data":"ok"} - ), - ) + return web.Response( + content_type="application/json", + text=json.dumps( + {"code": 0, "msg":"ok"} + ), + ) + except Exception as e: + logger.exception('exception:') + return web.Response( + content_type="application/json", + text=json.dumps( + {"code": -1, "msg": str(e)} + ), + ) async def record(request): - params = await request.json() - - sessionid = params.get('sessionid',0) - if params['type']=='start_record': - # nerfreals[sessionid].put_msg_txt(params['text']) - nerfreals[sessionid].start_recording() - elif params['type']=='end_record': - nerfreals[sessionid].stop_recording() - return web.Response( - content_type="application/json", - text=json.dumps( - {"code": 0, "data":"ok"} - ), - ) + try: + params = await request.json() + + sessionid = params.get('sessionid',0) + if params['type']=='start_record': + # nerfreals[sessionid].put_msg_txt(params['text']) + nerfreals[sessionid].start_recording() + elif params['type']=='end_record': + nerfreals[sessionid].stop_recording() + return web.Response( + content_type="application/json", + text=json.dumps( + {"code": 0, "msg":"ok"} + ), + ) + except Exception as e: + logger.exception('exception:') + return web.Response( + content_type="application/json", + text=json.dumps( + {"code": -1, "msg": str(e)} + ), + ) async def is_speaking(request): params = await request.json() @@ -342,6 +399,7 @@ if __name__ == '__main__': appasync.router.add_post("/humanaudio", humanaudio) appasync.router.add_post("/set_audiotype", set_audiotype) appasync.router.add_post("/record", record) + appasync.router.add_post("/interrupt_talk", interrupt_talk) appasync.router.add_post("/is_speaking", is_speaking) appasync.router.add_static('/',path='web')