speech-demo-service/src/main/java/com/supervision/service/impl/ChatServiceImpl.java

81 lines
3.2 KiB
Java

package com.supervision.service.impl;
import cn.hutool.core.codec.Base64;
import com.supervision.dto.dify.ChatResDTO;
import com.supervision.dto.paddlespeech.res.TtsResultDTO;
import com.supervision.dto.robot.AnswerInfo;
import com.supervision.dto.robot.AskInfo;
import com.supervision.dto.robot.RobotTalkDTO;
import com.supervision.model.RobotTalkReq;
import com.supervision.model.dify.DIFYChatReqInputVO;
import com.supervision.model.dify.DifyChatReqVO;
import com.supervision.service.IChatService;
import com.supervision.util.AsrUtil;
import com.supervision.util.DifyApiUtil;
import com.supervision.util.TtsUtil;
import jakarta.annotation.Resource;
import jakarta.servlet.http.HttpServletResponse;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.StopWatch;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
@Slf4j
@Service
public class ChatServiceImpl implements IChatService {
@Resource
private DifyApiUtil difyApiUtil;
Map<String, String> voiceCache = new HashMap<>();
@Override
public RobotTalkDTO talk(MultipartFile file, RobotTalkReq robotTalkReq) {
log.info("robotTalkReq:{}", robotTalkReq);
RobotTalkDTO.RobotTalkDTOBuilder builder = RobotTalkDTO.builder();
try {
byte[] bytes = file.getBytes();
StopWatch stopWatch = new StopWatch();
DifyChatReqVO difyChatReqVO = new DifyChatReqVO();
difyChatReqVO.setUser("admin");
DIFYChatReqInputVO inputs = new DIFYChatReqInputVO();
stopWatch.start("stt");
stopWatch.stop();
difyChatReqVO.setQuery(AsrUtil.asrTransformByBytes(bytes));
difyChatReqVO.setConversation_id(robotTalkReq.getSessionId());
stopWatch.start("dify");
ChatResDTO chatResDTO = difyApiUtil.chat(difyChatReqVO);
stopWatch.stop();
log.info("response:{}", chatResDTO.getAnswer());
builder.askInfo(AskInfo.builder().contentType(2).message(inputs.getQuery()).audioLength(100L).askId(chatResDTO.getMessage_id()).build());
voiceCache.put(chatResDTO.getMessage_id(), Base64.encode(bytes));
stopWatch.start("tts");
TtsResultDTO ttsResultDTO = TtsUtil.ttsTransform(chatResDTO.getAnswer());
stopWatch.stop();
String voiceBaseId = UUID.randomUUID().toString();
builder.answerInfo(AnswerInfo.builder().contentType(2).message(chatResDTO.getAnswer()).voiceBaseId(voiceBaseId).voiceBase64(ttsResultDTO.getAudio()).build());
builder.sessionId(chatResDTO.getConversation_id());
voiceCache.put(voiceBaseId, ttsResultDTO.getAudio());
log.info("耗时:{}", stopWatch.prettyPrint());
} catch (IOException e) {
throw new RuntimeException(e);
}
return builder.build();
}
@Override
public void getAudio(HttpServletResponse response, String audioId) throws IOException {
log.info("audioId:{}", audioId);
Base64.decodeToStream(voiceCache.get(audioId), response.getOutputStream(), false);
}
}