diff --git a/src/main/java/com/supervision/controller/VoiceController.java b/src/main/java/com/supervision/controller/VoiceController.java new file mode 100644 index 0000000..f799e16 --- /dev/null +++ b/src/main/java/com/supervision/controller/VoiceController.java @@ -0,0 +1,33 @@ +package com.supervision.controller; + +import com.supervision.service.VoiceService; +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; + +@Slf4j +@Api(tags = "音频处理") +@RequestMapping("voice") +@RequiredArgsConstructor +public class VoiceController { + + private final VoiceService voiceService; + + @ApiOperation("语音转文字") + @PostMapping("voiceToText") + public String voiceToText(MultipartFile file, String sessionId) throws IOException { + return voiceService.voiceToText(file,sessionId); + } + + @ApiOperation("文字转语音") + @PostMapping("textToVoice") + public String textToVoice(String text){ + return voiceService.textToVoice(text); + } +} diff --git a/src/main/java/com/supervision/dto/paddlespeech/req/AsrReqDTO.java b/src/main/java/com/supervision/dto/paddlespeech/req/AsrReqDTO.java new file mode 100644 index 0000000..b593b72 --- /dev/null +++ b/src/main/java/com/supervision/dto/paddlespeech/req/AsrReqDTO.java @@ -0,0 +1,21 @@ +package com.supervision.dto.paddlespeech.req; + +import lombok.Data; + +@Data +public class AsrReqDTO { + + private final String audio; + + private final String audio_format = "wav"; + + private final Integer sample_rate = 16000; + + private final String lang = "zh_cn"; + + private final Integer punc = 0; + + public AsrReqDTO(String audio) { + this.audio = audio; + } +} diff --git a/src/main/java/com/supervision/dto/paddlespeech/req/TtsReqDTO.java b/src/main/java/com/supervision/dto/paddlespeech/req/TtsReqDTO.java new file mode 100644 index 0000000..4ba21b6 --- /dev/null +++ b/src/main/java/com/supervision/dto/paddlespeech/req/TtsReqDTO.java @@ -0,0 +1,23 @@ +package com.supervision.dto.paddlespeech.req; + +import lombok.Data; + +@Data +public class TtsReqDTO { + + private final String text; + + private final Integer spk_id = 0; + + private final Double speed = 1.0; + + private final Double volume = 1.0; + + private final Integer sample_rate = 16000; + + private final String save_path = "./tts.wav"; + + public TtsReqDTO(String text) { + this.text = text; + } +} diff --git a/src/main/java/com/supervision/dto/paddlespeech/res/AsrResultDTO.java b/src/main/java/com/supervision/dto/paddlespeech/res/AsrResultDTO.java new file mode 100644 index 0000000..4942166 --- /dev/null +++ b/src/main/java/com/supervision/dto/paddlespeech/res/AsrResultDTO.java @@ -0,0 +1,10 @@ +package com.supervision.dto.paddlespeech.res; + +import lombok.Data; + +@Data +public class AsrResultDTO { + + private String transcription; + +} diff --git a/src/main/java/com/supervision/dto/paddlespeech/res/PaddleSpeechResDTO.java b/src/main/java/com/supervision/dto/paddlespeech/res/PaddleSpeechResDTO.java new file mode 100644 index 0000000..3caca10 --- /dev/null +++ b/src/main/java/com/supervision/dto/paddlespeech/res/PaddleSpeechResDTO.java @@ -0,0 +1,15 @@ +package com.supervision.dto.paddlespeech.res; + +import lombok.Data; + +@Data +public class PaddleSpeechResDTO { + + private Boolean success; + + private Integer code; + + private Object message; + + private T result; +} diff --git a/src/main/java/com/supervision/dto/paddlespeech/res/TtsResultDTO.java b/src/main/java/com/supervision/dto/paddlespeech/res/TtsResultDTO.java new file mode 100644 index 0000000..f329050 --- /dev/null +++ b/src/main/java/com/supervision/dto/paddlespeech/res/TtsResultDTO.java @@ -0,0 +1,23 @@ +package com.supervision.dto.paddlespeech.res; + +import lombok.Data; + +@Data +public class TtsResultDTO { + + private String lang; + + private String spk_id; + + private String speed; + + private String volume; + + private String sample_rate; + + private String duration; + + private String save_path; + + private String audio; +} diff --git a/src/main/java/com/supervision/service/VoiceService.java b/src/main/java/com/supervision/service/VoiceService.java new file mode 100644 index 0000000..73b6d2e --- /dev/null +++ b/src/main/java/com/supervision/service/VoiceService.java @@ -0,0 +1,13 @@ +package com.supervision.service; + +import com.supervision.vo.voice.VoiceReqVO; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; + +public interface VoiceService { + + String voiceToText(MultipartFile file, String sessionId) throws IOException; + + String textToVoice(String text); +} diff --git a/src/main/java/com/supervision/service/impl/VoiceServiceImpl.java b/src/main/java/com/supervision/service/impl/VoiceServiceImpl.java new file mode 100644 index 0000000..8a8b28e --- /dev/null +++ b/src/main/java/com/supervision/service/impl/VoiceServiceImpl.java @@ -0,0 +1,34 @@ +package com.supervision.service.impl; + +import cn.hutool.core.date.DateTime; +import cn.hutool.core.date.DateUnit; +import cn.hutool.core.date.DateUtil; +import com.supervision.service.VoiceService; +import com.supervision.util.AsrUtil; +import com.supervision.util.TtsUtil; +import com.supervision.vo.voice.VoiceReqVO; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +import java.io.IOException; + +@Slf4j +@Service +@RequiredArgsConstructor +public class VoiceServiceImpl implements VoiceService { + + @Override + public String voiceToText(MultipartFile file, String sessionId) throws IOException { + log.info("语音转文字开始"); + return AsrUtil.asrTransformByBytes(file.getBytes()); + } + + + @Override + public String textToVoice(String text) { + log.info("文字转语音开始"); + return TtsUtil.ttsTransform(text); + } +} diff --git a/src/main/java/com/supervision/util/AsrUtil.java b/src/main/java/com/supervision/util/AsrUtil.java new file mode 100644 index 0000000..2172cd4 --- /dev/null +++ b/src/main/java/com/supervision/util/AsrUtil.java @@ -0,0 +1,46 @@ +package com.supervision.util; + +import cn.hutool.core.codec.Base64; +import cn.hutool.core.util.ObjectUtil; +import cn.hutool.http.HttpUtil; +import cn.hutool.json.JSONUtil; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.supervision.dto.paddlespeech.req.AsrReqDTO; +import com.supervision.dto.paddlespeech.res.AsrResultDTO; +import com.supervision.dto.paddlespeech.res.PaddleSpeechResDTO; +import com.supervision.exception.BusinessException; +import org.springframework.core.env.Environment; + +public class AsrUtil { + + private static final String ASR_URL = SpringBeanUtil.getBean(Environment.class).getProperty("paddle-speech.asr"); + + private static final ObjectMapper objectMapper = new ObjectMapper(); + + /** + * 语音转文字 + */ + public static String asrTransformByBytes(byte[] bytes) { + // 首先编码为base64编码 + String encode = Base64.encode(bytes); + return asrTransformByBytes(encode); + } + + public static String asrTransformByBytes(String voiceBase64){ + // 这里调用Python的接口,将文字转换为语音 + String post = HttpUtil.post(ASR_URL, JSONUtil.toJsonStr(new AsrReqDTO(voiceBase64))); + try { + PaddleSpeechResDTO response = objectMapper.readValue(post, new TypeReference>() { + }); + if (!response.getSuccess() || ObjectUtil.isEmpty(response.getResult())) { + throw new BusinessException("语音转换文字失败"); + } + return response.getResult().getTranscription(); + + } catch (Exception e) { + throw new BusinessException("语音转换文字失败"); + } + + } +} diff --git a/src/main/java/com/supervision/util/TtsUtil.java b/src/main/java/com/supervision/util/TtsUtil.java new file mode 100644 index 0000000..2cff1e6 --- /dev/null +++ b/src/main/java/com/supervision/util/TtsUtil.java @@ -0,0 +1,35 @@ +package com.supervision.util; + +import cn.hutool.core.util.ObjectUtil; +import cn.hutool.http.HttpUtil; +import cn.hutool.json.JSONUtil; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.supervision.dto.paddlespeech.req.TtsReqDTO; +import com.supervision.dto.paddlespeech.res.PaddleSpeechResDTO; +import com.supervision.dto.paddlespeech.res.TtsResultDTO; +import com.supervision.exception.BusinessException; +import org.springframework.core.env.Environment; + +public class TtsUtil { + + private static final String TTS_URL = SpringBeanUtil.getBean(Environment.class).getProperty("paddle-speech.tts"); + + private static final ObjectMapper objectMapper = new ObjectMapper(); + + public static String ttsTransform(String str) { + // 构建 + String post = HttpUtil.post(TTS_URL, JSONUtil.toJsonStr(new TtsReqDTO(str))); + try { + PaddleSpeechResDTO response = objectMapper.readValue(post, new TypeReference>() { + }); + if (!response.getSuccess() || ObjectUtil.isEmpty(response.getResult())) { + throw new BusinessException("文字转换语音失败"); + } + return response.getResult().getAudio(); + } catch (Exception e) { + throw new BusinessException("语音转换文字失败", e); + } + + } +} diff --git a/src/main/java/com/supervision/vo/voice/VoiceReqVO.java b/src/main/java/com/supervision/vo/voice/VoiceReqVO.java new file mode 100644 index 0000000..fa8dbbb --- /dev/null +++ b/src/main/java/com/supervision/vo/voice/VoiceReqVO.java @@ -0,0 +1,11 @@ +package com.supervision.vo.voice; + +import lombok.Data; + +@Data +public class VoiceReqVO { + + private String voiceBase64; + + private String sessionId; +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 593bd19..35055c9 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -44,6 +44,10 @@ spring: matchTool: url: http://192.168.10.29:8000 scoreThreshold: 0.4 +paddle-speech: + # https://github.com/PaddlePaddle/PaddleSpeech/wiki/PaddleSpeech-Server-RESTful-API + tts: http://192.168.10.137:8090/paddlespeech/tts + asr: http://192.168.10.137:8090/paddlespeech/asr mybatis-plus: mapper-locations: classpath*:mapper/**/*.xml configuration: