LiveTalking/llm.py

import re
import time
import requests
import json
from basereal import BaseReal
from logger import logger

def llm_response(message: str, nerfreal: BaseReal):
    start = time.perf_counter()
    logger.info(f"message:{message}")
    url = "http://192.168.10.70:11434/api/chat"
    data = {
        "model": "qwen3:30b-a3b",
        "messages": [
            {"role": "system", "content": "你叫小苏，贵州人，带有轻微贵州口音,现在正在直播，你的人设是带货主播，你现在卖的产品是酱酒，任务是消除消费者对假酒的顾虑。"},
            {"role": "user", "content": f"{message}\n不可以输出表情包/no_think"}
        ],
        "stream": True
    }

    response = requests.post(url, json=data, stream=True)
    logger.info(f"Ollama init time: {time.perf_counter() - start:.2f}s")

    result = ""
    full_answer = ""
    first = True
    think_tag_pattern = re.compile(r"</?think>", re.IGNORECASE)
    punctuation = ",.!;:，。！？：；"

    for line in response.iter_lines():
        if not line:
            continue

        try:
            json_data = json.loads(line.decode("utf-8"))
        except json.JSONDecodeError:
            continue

        # 如果有结束标志，可以在此处判断并 break
        # if json_data.get("done"):
        #     break

        msg = json_data.get("message", {}).get("content", "")
        msg = think_tag_pattern.sub("", msg)
        if not msg:
            continue

        full_answer += msg

        if first:
            logger.info(f"Ollama time to first chunk: {time.perf_counter() - start:.2f}s")
            first = False

        lastpos = 0
        for i, char in enumerate(msg):
            if char in punctuation:
                result += msg[lastpos:i+1]
                lastpos = i+1
                if len(result) > 10:
                    logger.info(result)
                    nerfreal.put_msg_txt(result)
                    result = ""
        result += msg[lastpos:]

    if result:
        nerfreal.put_msg_txt(result)