package com.supervision.service.impl; import cn.hutool.core.codec.Base64; import com.supervision.dto.dify.ChatResDTO; import com.supervision.dto.paddlespeech.res.TtsResultDTO; import com.supervision.dto.robot.AnswerInfo; import com.supervision.dto.robot.AskInfo; import com.supervision.dto.robot.RobotTalkDTO; import com.supervision.model.RobotTalkReq; import com.supervision.model.dify.DIFYChatReqInputVO; import com.supervision.model.dify.DifyChatReqVO; import com.supervision.service.IChatService; import com.supervision.util.AsrUtil; import com.supervision.util.DifyApiUtil; import com.supervision.util.TtsUtil; import jakarta.annotation.Resource; import jakarta.servlet.http.HttpServletResponse; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; import org.springframework.util.StopWatch; import org.springframework.web.multipart.MultipartFile; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.UUID; @Slf4j @Service public class ChatServiceImpl implements IChatService { @Resource private DifyApiUtil difyApiUtil; Map voiceCache = new HashMap<>(); @Override public RobotTalkDTO talk(MultipartFile file, RobotTalkReq robotTalkReq) { log.info("robotTalkReq:{}", robotTalkReq); RobotTalkDTO.RobotTalkDTOBuilder builder = RobotTalkDTO.builder(); try { byte[] bytes = file.getBytes(); StopWatch stopWatch = new StopWatch(); DifyChatReqVO difyChatReqVO = new DifyChatReqVO(); difyChatReqVO.setUser("admin"); DIFYChatReqInputVO inputs = new DIFYChatReqInputVO(); stopWatch.start("stt"); stopWatch.stop(); difyChatReqVO.setQuery(AsrUtil.asrTransformByBytes(bytes)); difyChatReqVO.setConversation_id(robotTalkReq.getSessionId()); stopWatch.start("dify"); ChatResDTO chatResDTO = difyApiUtil.chat(difyChatReqVO); stopWatch.stop(); log.info("response:{}", chatResDTO.getAnswer()); builder.askInfo(AskInfo.builder().contentType(2).message(inputs.getQuery()).audioLength(100L).askId(chatResDTO.getMessage_id()).build()); voiceCache.put(chatResDTO.getMessage_id(), Base64.encode(bytes)); stopWatch.start("tts"); TtsResultDTO ttsResultDTO = TtsUtil.ttsTransform(chatResDTO.getAnswer()); stopWatch.stop(); String voiceBaseId = UUID.randomUUID().toString(); builder.answerInfo(AnswerInfo.builder().contentType(2).message(chatResDTO.getAnswer()).voiceBaseId(voiceBaseId).voiceBase64(ttsResultDTO.getAudio()).build()); builder.sessionId(chatResDTO.getConversation_id()); voiceCache.put(voiceBaseId, ttsResultDTO.getAudio()); log.info("耗时:{}", stopWatch.prettyPrint()); } catch (IOException e) { throw new RuntimeException(e); } return builder.build(); } @Override public void getAudio(HttpServletResponse response, String audioId) throws IOException { log.info("audioId:{}", audioId); Base64.decodeToStream(voiceCache.get(audioId), response.getOutputStream(), false); } }