You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
2.0 KiB
Python

import re
import time
import requests
import json
from basereal import BaseReal
from logger import logger
def llm_response(message: str, nerfreal: BaseReal):
start = time.perf_counter()
logger.info(f"message:{message}")
url = "http://192.168.10.70:11434/api/chat"
data = {
"model": "qwen3:30b-a3b",
"messages": [
{"role": "system", "content": "你叫小苏,贵州人,带有轻微贵州口音,现在正在直播,你的人设是带货主播,你现在卖的产品是酱酒,任务是消除消费者对假酒的顾虑。"},
{"role": "user", "content": f"{message}\n不可以输出表情包/no_think"}
],
"stream": True
}
response = requests.post(url, json=data, stream=True)
logger.info(f"Ollama init time: {time.perf_counter() - start:.2f}s")
result = ""
full_answer = ""
first = True
think_tag_pattern = re.compile(r"</?think>", re.IGNORECASE)
punctuation = ",.!;:,。!?:;"
for line in response.iter_lines():
if not line:
continue
try:
json_data = json.loads(line.decode("utf-8"))
except json.JSONDecodeError:
continue
# 如果有结束标志,可以在此处判断并 break
# if json_data.get("done"):
# break
msg = json_data.get("message", {}).get("content", "")
msg = think_tag_pattern.sub("", msg)
if not msg:
continue
full_answer += msg
if first:
logger.info(f"Ollama time to first chunk: {time.perf_counter() - start:.2f}s")
first = False
lastpos = 0
for i, char in enumerate(msg):
if char in punctuation:
result += msg[lastpos:i+1]
lastpos = i+1
if len(result) > 10:
logger.info(result)
nerfreal.put_msg_txt(result)
result = ""
result += msg[lastpos:]
if result:
nerfreal.put_msg_txt(result)