From 7ecdb5f3bcb2c4bb158c340c7c9f3d27701def93 Mon Sep 17 00:00:00 2001
From: lipku <lipku@foxmail.com>
Date: Sun, 22 Jun 2025 19:56:02 +0800
Subject: [PATCH] Improve the API interface

---
 README.md |   9 ++--
 app.py    | 138 ++++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 104 insertions(+), 43 deletions(-)
diff --git a/README.md b/README.md
index 7d80564..de14ebe 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
  [English](./README-EN.md) | 中文版   
  实时交互流式数字人，实现音视频同步对话。基本可以达到商用效果
-[wav2lip效果](https://www.bilibili.com/video/BV1scwBeyELA/) | [ernerf效果](https://www.bilibili.com/video/BV1G1421z73r/) | [musetalk效果](https://www.bilibili.com/video/BV1gm421N7vQ/)
+[wav2lip效果](https://www.bilibili.com/video/BV1scwBeyELA/) | [ernerf效果](https://www.bilibili.com/video/BV1G1421z73r/) | [musetalk效果](https://www.bilibili.com/video/BV1gm421N7vQ/)  
+国内镜像地址:<https://gitee.com/lipku/LiveTalking> 
 
 ## 为避免与3d数字人混淆，原项目metahuman-stream改名为livetalking，原有链接地址继续可用
 
@@ -54,9 +55,10 @@ GoogleDriver <https://drive.google.com/drive/folders/1FOC_MD6wdogyyX_7V1d4NDIO7P
 将wav2lip256_avatar1.tar.gz解压后整个文件夹拷到本项目的data/avatars下
 - 运行  
 python app.py --transport webrtc --model wav2lip --avatar_id wav2lip256_avatar1  
-用浏览器打开http://serverip:8010/webrtcapi.html , 先点‘start',播放数字人视频；然后在文本框输入任意文字，提交。数字人播报该段文字  
 <font color=red>服务端需要开放端口 tcp:8010; udp:1-65536 </font>  
-如果需要商用高清wav2lip模型，[链接](https://livetalking-doc.readthedocs.io/zh-cn/latest/service.html#wav2lip) 
+客户端可以选用以下两种方式:  
+(1)用浏览器打开http://serverip:8010/webrtcapi.html , 先点‘start',播放数字人视频；然后在文本框输入任意文字，提交。数字人播报该段文字  
+(2)用客户端方式, 下载地址<https://pan.quark.cn/s/d7192d8ac19b>   
 
 - 快速体验  
 <https://www.compshare.cn/images/4458094e-a43d-45fe-9b57-de79253befe4?referral_code=3XW3852OBmnD089hMMrtuU&ytag=GPU_GitHub_livetalking> 用该镜像创建实例即可运行成功
@@ -108,6 +110,7 @@ wav2lip256显卡3060以上即可，musetalk需要3080Ti以上。
 5. 动作编排：不说话时动作、唤醒时动作、思考时动作、进入休眠动作
 6. 支持不限时长的数字人形象avatar
 7. 提供实时音频流输入接口
+8. 数字人透明背景，能叠加动态背景
 
 ## 7. 声明
 基于本项目开发并发布在B站、视频号、抖音等网站上的视频需带上LiveTalking水印和标识，如需去除请联系作者备案授权。
diff --git a/app.py b/app.py
index 855dd4d..4ef5a43 100644
--- a/app.py
+++ b/app.py
@@ -45,6 +45,7 @@ import asyncio
 import torch
 from typing import Dict
 from logger import logger
+import gc
 
 
 app = Flask(__name__)
@@ -87,7 +88,12 @@ async def offer(request):
 
     if len(nerfreals) >= opt.max_session:
         logger.info('reach max session')
-        return -1
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": -1, "msg": "reach max session"}
+            ),
+        )
     sessionid = randN(6) #len(nerfreals)
     logger.info('sessionid=%d',sessionid)
     nerfreals[sessionid] = None
@@ -109,6 +115,7 @@ async def offer(request):
         if pc.connectionState == "closed":
             pcs.discard(pc)
             del nerfreals[sessionid]
+            gc.collect()
 
     player = HumanPlayer(nerfreals[sessionid])
     audio_sender = pc.addTrack(player.audio)
@@ -135,24 +142,55 @@ async def offer(request):
     )
 
 async def human(request):
-    params = await request.json()
+    try:
+        params = await request.json()
 
-    sessionid = params.get('sessionid',0)
-    if params.get('interrupt'):
-        nerfreals[sessionid].flush_talk()
+        sessionid = params.get('sessionid',0)
+        if params.get('interrupt'):
+            nerfreals[sessionid].flush_talk()
 
-    if params['type']=='echo':
-        nerfreals[sessionid].put_msg_txt(params['text'])
-    elif params['type']=='chat':
-        res=await asyncio.get_event_loop().run_in_executor(None, llm_response, params['text'],nerfreals[sessionid])                         
-        #nerfreals[sessionid].put_msg_txt(res)
+        if params['type']=='echo':
+            nerfreals[sessionid].put_msg_txt(params['text'])
+        elif params['type']=='chat':
+            asyncio.get_event_loop().run_in_executor(None, llm_response, params['text'],nerfreals[sessionid])                         
+            #nerfreals[sessionid].put_msg_txt(res)
 
-    return web.Response(
-        content_type="application/json",
-        text=json.dumps(
-            {"code": 0, "data":"ok"}
-        ),
-    )
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": 0, "msg":"ok"}
+            ),
+        )
+    except Exception as e:
+        logger.exception('exception:')
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": -1, "msg": str(e)}
+            ),
+        )
+
+async def interrupt_talk(request):
+    try:
+        params = await request.json()
+
+        sessionid = params.get('sessionid',0)
+        nerfreals[sessionid].flush_talk()
+        
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": 0, "msg":"ok"}
+            ),
+        )
+    except Exception as e:
+        logger.exception('exception:')
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": -1, "msg": str(e)}
+            ),
+        )
 
 async def humanaudio(request):
     try:
@@ -170,41 +208,60 @@ async def humanaudio(request):
             ),
         )
     except Exception as e:
+        logger.exception('exception:')
         return web.Response(
             content_type="application/json",
             text=json.dumps(
-                {"code": -1, "msg":"err","data": ""+e.args[0]+""}
+                {"code": -1, "msg": str(e)}
             ),
         )
 
 async def set_audiotype(request):
-    params = await request.json()
+    try:
+        params = await request.json()
 
-    sessionid = params.get('sessionid',0)    
-    nerfreals[sessionid].set_custom_state(params['audiotype'],params['reinit'])
+        sessionid = params.get('sessionid',0)    
+        nerfreals[sessionid].set_custom_state(params['audiotype'],params['reinit'])
 
-    return web.Response(
-        content_type="application/json",
-        text=json.dumps(
-            {"code": 0, "data":"ok"}
-        ),
-    )
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": 0, "msg":"ok"}
+            ),
+        )
+    except Exception as e:
+        logger.exception('exception:')
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": -1, "msg": str(e)}
+            ),
+        )
 
 async def record(request):
-    params = await request.json()
-
-    sessionid = params.get('sessionid',0)
-    if params['type']=='start_record':
-        # nerfreals[sessionid].put_msg_txt(params['text'])
-        nerfreals[sessionid].start_recording()
-    elif params['type']=='end_record':
-        nerfreals[sessionid].stop_recording()
-    return web.Response(
-        content_type="application/json",
-        text=json.dumps(
-            {"code": 0, "data":"ok"}
-        ),
-    )
+    try:
+        params = await request.json()
+
+        sessionid = params.get('sessionid',0)
+        if params['type']=='start_record':
+            # nerfreals[sessionid].put_msg_txt(params['text'])
+            nerfreals[sessionid].start_recording()
+        elif params['type']=='end_record':
+            nerfreals[sessionid].stop_recording()
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": 0, "msg":"ok"}
+            ),
+        )
+    except Exception as e:
+        logger.exception('exception:')
+        return web.Response(
+            content_type="application/json",
+            text=json.dumps(
+                {"code": -1, "msg": str(e)}
+            ),
+        )
 
 async def is_speaking(request):
     params = await request.json()
@@ -342,6 +399,7 @@ if __name__ == '__main__':
     appasync.router.add_post("/humanaudio", humanaudio)
     appasync.router.add_post("/set_audiotype", set_audiotype)
     appasync.router.add_post("/record", record)
+    appasync.router.add_post("/interrupt_talk", interrupt_talk)
     appasync.router.add_post("/is_speaking", is_speaking)
     appasync.router.add_static('/',path='web')