Improve the API interface

main
lipku 1 month ago
parent 3ff0274403
commit 7ecdb5f3bc

@ -1,6 +1,7 @@
[English](./README-EN.md) | 中文版 [English](./README-EN.md) | 中文版
实时交互流式数字人,实现音视频同步对话。基本可以达到商用效果 实时交互流式数字人,实现音视频同步对话。基本可以达到商用效果
[wav2lip效果](https://www.bilibili.com/video/BV1scwBeyELA/) | [ernerf效果](https://www.bilibili.com/video/BV1G1421z73r/) | [musetalk效果](https://www.bilibili.com/video/BV1gm421N7vQ/) [wav2lip效果](https://www.bilibili.com/video/BV1scwBeyELA/) | [ernerf效果](https://www.bilibili.com/video/BV1G1421z73r/) | [musetalk效果](https://www.bilibili.com/video/BV1gm421N7vQ/)
国内镜像地址:<https://gitee.com/lipku/LiveTalking>
## 为避免与3d数字人混淆原项目metahuman-stream改名为livetalking原有链接地址继续可用 ## 为避免与3d数字人混淆原项目metahuman-stream改名为livetalking原有链接地址继续可用
@ -54,9 +55,10 @@ GoogleDriver <https://drive.google.com/drive/folders/1FOC_MD6wdogyyX_7V1d4NDIO7P
将wav2lip256_avatar1.tar.gz解压后整个文件夹拷到本项目的data/avatars下 将wav2lip256_avatar1.tar.gz解压后整个文件夹拷到本项目的data/avatars下
- 运行 - 运行
python app.py --transport webrtc --model wav2lip --avatar_id wav2lip256_avatar1 python app.py --transport webrtc --model wav2lip --avatar_id wav2lip256_avatar1
用浏览器打开http://serverip:8010/webrtcapi.html , 先点start',播放数字人视频;然后在文本框输入任意文字,提交。数字人播报该段文字
<font color=red>服务端需要开放端口 tcp:8010; udp:1-65536 </font> <font color=red>服务端需要开放端口 tcp:8010; udp:1-65536 </font>
如果需要商用高清wav2lip模型[链接](https://livetalking-doc.readthedocs.io/zh-cn/latest/service.html#wav2lip) 客户端可以选用以下两种方式:
(1)用浏览器打开http://serverip:8010/webrtcapi.html , 先点start',播放数字人视频;然后在文本框输入任意文字,提交。数字人播报该段文字
(2)用客户端方式, 下载地址<https://pan.quark.cn/s/d7192d8ac19b>
- 快速体验 - 快速体验
<https://www.compshare.cn/images/4458094e-a43d-45fe-9b57-de79253befe4?referral_code=3XW3852OBmnD089hMMrtuU&ytag=GPU_GitHub_livetalking> 用该镜像创建实例即可运行成功 <https://www.compshare.cn/images/4458094e-a43d-45fe-9b57-de79253befe4?referral_code=3XW3852OBmnD089hMMrtuU&ytag=GPU_GitHub_livetalking> 用该镜像创建实例即可运行成功
@ -108,6 +110,7 @@ wav2lip256显卡3060以上即可musetalk需要3080Ti以上。
5. 动作编排:不说话时动作、唤醒时动作、思考时动作、进入休眠动作 5. 动作编排:不说话时动作、唤醒时动作、思考时动作、进入休眠动作
6. 支持不限时长的数字人形象avatar 6. 支持不限时长的数字人形象avatar
7. 提供实时音频流输入接口 7. 提供实时音频流输入接口
8. 数字人透明背景,能叠加动态背景
## 7. 声明 ## 7. 声明
基于本项目开发并发布在B站、视频号、抖音等网站上的视频需带上LiveTalking水印和标识如需去除请联系作者备案授权。 基于本项目开发并发布在B站、视频号、抖音等网站上的视频需带上LiveTalking水印和标识如需去除请联系作者备案授权。

138
app.py

@ -45,6 +45,7 @@ import asyncio
import torch import torch
from typing import Dict from typing import Dict
from logger import logger from logger import logger
import gc
app = Flask(__name__) app = Flask(__name__)
@ -87,7 +88,12 @@ async def offer(request):
if len(nerfreals) >= opt.max_session: if len(nerfreals) >= opt.max_session:
logger.info('reach max session') logger.info('reach max session')
return -1 return web.Response(
content_type="application/json",
text=json.dumps(
{"code": -1, "msg": "reach max session"}
),
)
sessionid = randN(6) #len(nerfreals) sessionid = randN(6) #len(nerfreals)
logger.info('sessionid=%d',sessionid) logger.info('sessionid=%d',sessionid)
nerfreals[sessionid] = None nerfreals[sessionid] = None
@ -109,6 +115,7 @@ async def offer(request):
if pc.connectionState == "closed": if pc.connectionState == "closed":
pcs.discard(pc) pcs.discard(pc)
del nerfreals[sessionid] del nerfreals[sessionid]
gc.collect()
player = HumanPlayer(nerfreals[sessionid]) player = HumanPlayer(nerfreals[sessionid])
audio_sender = pc.addTrack(player.audio) audio_sender = pc.addTrack(player.audio)
@ -135,24 +142,55 @@ async def offer(request):
) )
async def human(request): async def human(request):
params = await request.json() try:
params = await request.json()
sessionid = params.get('sessionid',0) sessionid = params.get('sessionid',0)
if params.get('interrupt'): if params.get('interrupt'):
nerfreals[sessionid].flush_talk() nerfreals[sessionid].flush_talk()
if params['type']=='echo': if params['type']=='echo':
nerfreals[sessionid].put_msg_txt(params['text']) nerfreals[sessionid].put_msg_txt(params['text'])
elif params['type']=='chat': elif params['type']=='chat':
res=await asyncio.get_event_loop().run_in_executor(None, llm_response, params['text'],nerfreals[sessionid]) asyncio.get_event_loop().run_in_executor(None, llm_response, params['text'],nerfreals[sessionid])
#nerfreals[sessionid].put_msg_txt(res) #nerfreals[sessionid].put_msg_txt(res)
return web.Response( return web.Response(
content_type="application/json", content_type="application/json",
text=json.dumps( text=json.dumps(
{"code": 0, "data":"ok"} {"code": 0, "msg":"ok"}
), ),
) )
except Exception as e:
logger.exception('exception:')
return web.Response(
content_type="application/json",
text=json.dumps(
{"code": -1, "msg": str(e)}
),
)
async def interrupt_talk(request):
try:
params = await request.json()
sessionid = params.get('sessionid',0)
nerfreals[sessionid].flush_talk()
return web.Response(
content_type="application/json",
text=json.dumps(
{"code": 0, "msg":"ok"}
),
)
except Exception as e:
logger.exception('exception:')
return web.Response(
content_type="application/json",
text=json.dumps(
{"code": -1, "msg": str(e)}
),
)
async def humanaudio(request): async def humanaudio(request):
try: try:
@ -170,41 +208,60 @@ async def humanaudio(request):
), ),
) )
except Exception as e: except Exception as e:
logger.exception('exception:')
return web.Response( return web.Response(
content_type="application/json", content_type="application/json",
text=json.dumps( text=json.dumps(
{"code": -1, "msg":"err","data": ""+e.args[0]+""} {"code": -1, "msg": str(e)}
), ),
) )
async def set_audiotype(request): async def set_audiotype(request):
params = await request.json() try:
params = await request.json()
sessionid = params.get('sessionid',0) sessionid = params.get('sessionid',0)
nerfreals[sessionid].set_custom_state(params['audiotype'],params['reinit']) nerfreals[sessionid].set_custom_state(params['audiotype'],params['reinit'])
return web.Response( return web.Response(
content_type="application/json", content_type="application/json",
text=json.dumps( text=json.dumps(
{"code": 0, "data":"ok"} {"code": 0, "msg":"ok"}
), ),
) )
except Exception as e:
logger.exception('exception:')
return web.Response(
content_type="application/json",
text=json.dumps(
{"code": -1, "msg": str(e)}
),
)
async def record(request): async def record(request):
params = await request.json() try:
params = await request.json()
sessionid = params.get('sessionid',0)
if params['type']=='start_record': sessionid = params.get('sessionid',0)
# nerfreals[sessionid].put_msg_txt(params['text']) if params['type']=='start_record':
nerfreals[sessionid].start_recording() # nerfreals[sessionid].put_msg_txt(params['text'])
elif params['type']=='end_record': nerfreals[sessionid].start_recording()
nerfreals[sessionid].stop_recording() elif params['type']=='end_record':
return web.Response( nerfreals[sessionid].stop_recording()
content_type="application/json", return web.Response(
text=json.dumps( content_type="application/json",
{"code": 0, "data":"ok"} text=json.dumps(
), {"code": 0, "msg":"ok"}
) ),
)
except Exception as e:
logger.exception('exception:')
return web.Response(
content_type="application/json",
text=json.dumps(
{"code": -1, "msg": str(e)}
),
)
async def is_speaking(request): async def is_speaking(request):
params = await request.json() params = await request.json()
@ -342,6 +399,7 @@ if __name__ == '__main__':
appasync.router.add_post("/humanaudio", humanaudio) appasync.router.add_post("/humanaudio", humanaudio)
appasync.router.add_post("/set_audiotype", set_audiotype) appasync.router.add_post("/set_audiotype", set_audiotype)
appasync.router.add_post("/record", record) appasync.router.add_post("/record", record)
appasync.router.add_post("/interrupt_talk", interrupt_talk)
appasync.router.add_post("/is_speaking", is_speaking) appasync.router.add_post("/is_speaking", is_speaking)
appasync.router.add_static('/',path='web') appasync.router.add_static('/',path='web')

Loading…
Cancel
Save