import re import time import requests import json from basereal import BaseReal from logger import logger def llm_response(message: str, nerfreal: BaseReal): start = time.perf_counter() logger.info(f"message:{message}") url = "http://192.168.10.70:11434/api/chat" data = { "model": "qwen3:30b-a3b", "messages": [ {"role": "system", "content": "你叫小苏,贵州人,带有轻微贵州口音,现在正在直播,你的人设是带货主播,你现在卖的产品是酱酒,任务是消除消费者对假酒的顾虑。"}, {"role": "user", "content": f"{message}\n不可以输出表情包/no_think"} ], "stream": True } response = requests.post(url, json=data, stream=True) logger.info(f"Ollama init time: {time.perf_counter() - start:.2f}s") result = "" full_answer = "" first = True think_tag_pattern = re.compile(r"", re.IGNORECASE) punctuation = ",.!;:,。!?:;" for line in response.iter_lines(): if not line: continue try: json_data = json.loads(line.decode("utf-8")) except json.JSONDecodeError: continue # 如果有结束标志,可以在此处判断并 break # if json_data.get("done"): # break msg = json_data.get("message", {}).get("content", "") msg = think_tag_pattern.sub("", msg) if not msg: continue full_answer += msg if first: logger.info(f"Ollama time to first chunk: {time.perf_counter() - start:.2f}s") first = False lastpos = 0 for i, char in enumerate(msg): if char in punctuation: result += msg[lastpos:i+1] lastpos = i+1 if len(result) > 10: logger.info(result) nerfreal.put_msg_txt(result) result = "" result += msg[lastpos:] if result: nerfreal.put_msg_txt(result)