|
|
|
@ -1,16 +1,20 @@
|
|
|
|
|
package com.supervision.utils;
|
|
|
|
|
|
|
|
|
|
import cn.hutool.core.collection.CollUtil;
|
|
|
|
|
import cn.hutool.core.lang.Assert;
|
|
|
|
|
import cn.hutool.core.lang.Pair;
|
|
|
|
|
import cn.hutool.core.util.StrUtil;
|
|
|
|
|
import cn.hutool.http.HttpUtil;
|
|
|
|
|
import cn.hutool.json.JSONObject;
|
|
|
|
|
import cn.hutool.json.JSONUtil;
|
|
|
|
|
import com.google.gson.Gson;
|
|
|
|
|
import com.google.gson.JsonObject;
|
|
|
|
|
import com.google.gson.JsonParser;
|
|
|
|
|
import com.supervision.minio.domain.MinioFile;
|
|
|
|
|
import com.supervision.minio.service.MinioService;
|
|
|
|
|
import com.supervision.police.domain.ModelCase;
|
|
|
|
|
import com.supervision.police.dto.RecordFileDTO;
|
|
|
|
|
import com.supervision.police.service.FileOcrProcessService;
|
|
|
|
|
import com.supervision.police.vo.dify.DatasetReqVO;
|
|
|
|
|
import com.supervision.police.vo.dify.DatasetResVO;
|
|
|
|
|
import com.supervision.police.vo.dify.DifyChatReqVO;
|
|
|
|
@ -50,6 +54,9 @@ public class DifyApiUtil {
|
|
|
|
|
@Autowired
|
|
|
|
|
private MinioService minioService;
|
|
|
|
|
|
|
|
|
|
@Autowired
|
|
|
|
|
private FileOcrProcessService fileOcrProcessService;
|
|
|
|
|
|
|
|
|
|
public DifyChatResVO chat(DifyChatReqVO difyChatReqVO) {
|
|
|
|
|
DifyChatResVO difyChatResVO = null;
|
|
|
|
|
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
|
|
|
|
@ -200,6 +207,45 @@ public class DifyApiUtil {
|
|
|
|
|
}
|
|
|
|
|
return id;
|
|
|
|
|
}
|
|
|
|
|
public String createDocumentByText(String datasetId,String fileId,String fileName,String documentContent){
|
|
|
|
|
|
|
|
|
|
Assert.notEmpty(fileName, "文件名称不能为空");
|
|
|
|
|
if (StrUtil.isNotEmpty(fileId)){
|
|
|
|
|
fileName = generateDocumentName(fileName, fileId);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return createDocumentByText(datasetId,fileName,documentContent);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public String createDocumentByText(String datasetId,String documentName,String documentContent) {
|
|
|
|
|
// 创建文档url
|
|
|
|
|
String url = difyUrl + METHOD_DATASET + "/" + datasetId + METHOD_DOCUMENT + METHOD_CREATE_BY_TEXT;
|
|
|
|
|
|
|
|
|
|
cn.hutool.http.HttpRequest request = HttpUtil.createPost(url);
|
|
|
|
|
request.header(HttpHeaders.AUTHORIZATION, difyDatasetAuth);
|
|
|
|
|
|
|
|
|
|
Map<String, Object> body = new HashMap<>();
|
|
|
|
|
body.put("indexing_technique","high_quality");// 索引方式:高质量
|
|
|
|
|
body.put("process_rule",Map.of("mode","automatic"));//处理规则:自动
|
|
|
|
|
body.put("name",documentName);// 文档名称
|
|
|
|
|
body.put("text",documentContent);//文档内容
|
|
|
|
|
request.body(JSONUtil.toJsonStr(body));
|
|
|
|
|
|
|
|
|
|
try (cn.hutool.http.HttpResponse response = request.execute()) {
|
|
|
|
|
if (!response.isOk()) {
|
|
|
|
|
log.error("创建知识库文档失败!状态码:{},dify服务端返回内容:{}", response.getStatus(),response.body());
|
|
|
|
|
throw new RuntimeException("创建知识库文档失败!");
|
|
|
|
|
}
|
|
|
|
|
JSONObject document = JSONUtil.parseObj(response.body()).getJSONObject("document");
|
|
|
|
|
if (document == null){
|
|
|
|
|
log.error("创建知识库文档失败!dify服务端返回内容:{}",response.body());
|
|
|
|
|
throw new RuntimeException("创建知识库文档失败!");
|
|
|
|
|
}
|
|
|
|
|
log.info("创建知识库文档成功!ID:{}", document.get("id"));
|
|
|
|
|
return document.get("id").toString();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 删除知识库文档
|
|
|
|
@ -257,28 +303,26 @@ public class DifyApiUtil {
|
|
|
|
|
return documents;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void uploadCaseFileToDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds) {
|
|
|
|
|
public void uploadCaseFileToDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds,List<Document> documents) {
|
|
|
|
|
|
|
|
|
|
if (StrUtil.isEmpty(modelCase.getKnowledgeBaseId())){
|
|
|
|
|
log.warn("uploadRecordFileToKnowledgeBase:案件:{}案件或者知识库为空,不进行知识库维护!", modelCase.getCaseName());
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<Document> documents = this.queryDocuments(modelCase.getKnowledgeBaseId());
|
|
|
|
|
Set<String> documentFileIds = documents.stream().map(Document::getFileId).collect(Collectors.toSet());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 只上传 doc docx、txt、md、pdf 文件,且文件大小不能超过15mb
|
|
|
|
|
List<MinioFile> minioFiles = minioService.listMinioFile(allFileIds);
|
|
|
|
|
List<String> recordFileIds = minioFiles.stream().filter(minioFile -> {
|
|
|
|
|
boolean currentFileSize = minioFile.getSize() < 15 * 1024 * 1024;
|
|
|
|
|
if (!currentFileSize) {
|
|
|
|
|
log.warn("文件大小超过15mb,不进行知识库维护:{}", minioFile.getFilename());
|
|
|
|
|
log.warn("文件大小超过15mb,不进行文件知识库维护:{}", minioFile.getFilename());
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
boolean currentFileType = StrUtil.equalsAny(minioFile.getFileType(), "doc", "docx", "txt", "md", "pdf");
|
|
|
|
|
if (!currentFileType) {
|
|
|
|
|
log.warn("文件:{} 文件类型非doc、docx、txt、md、pdf,不进行知识库维护...", minioFile.getFilename());
|
|
|
|
|
log.warn("文件:{} 文件类型非doc、docx、txt、md、pdf,不进行文件知识库维护...", minioFile.getFilename());
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
@ -295,19 +339,87 @@ public class DifyApiUtil {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log.info("案件:{}上传笔录文件到知识库完成!",modelCase.getCaseName());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 同步案件所有文件到知识库
|
|
|
|
|
* @param modelCase 案件信息
|
|
|
|
|
* @param allFileIds 案件文件id列表
|
|
|
|
|
*/
|
|
|
|
|
public void syncCaseFileToDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds){
|
|
|
|
|
|
|
|
|
|
List<Document> documents = this.queryDocuments(modelCase.getKnowledgeBaseId());
|
|
|
|
|
|
|
|
|
|
// 把可以被dify直接识别的文件添加到知识库
|
|
|
|
|
uploadCaseFileToDifyKnowledgeBase(modelCase,allFileIds,documents);
|
|
|
|
|
|
|
|
|
|
// 把dify不能直接识别的文件ocr识别的内容添加到知识库
|
|
|
|
|
uploadCaseFileTextToDifyKnowledgeBase(modelCase,allFileIds,documents);
|
|
|
|
|
|
|
|
|
|
documents = this.queryDocuments(modelCase.getKnowledgeBaseId());
|
|
|
|
|
deleteAbsentCaseFileInDifyKnowledgeBase(modelCase,allFileIds,documents);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void uploadCaseFileTextToDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds,List<Document> documents) {
|
|
|
|
|
|
|
|
|
|
if (StrUtil.isEmpty(modelCase.getKnowledgeBaseId())){
|
|
|
|
|
log.warn("uploadCaseFileTextToDifyKnowledgeBase:案件:{}案件或者知识库为空,不进行知识库维护!", modelCase.getCaseName());
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Set<String> documentFileIds = documents.stream().map(Document::getFileId).collect(Collectors.toSet());
|
|
|
|
|
|
|
|
|
|
List<RecordFileDTO> recordFileDTOS = fileOcrProcessService.queryFileList(allFileIds);
|
|
|
|
|
// 上传除了 doc docx、txt、md、pdf 之外的文件
|
|
|
|
|
List<String> textFileIdList = recordFileDTOS.stream().filter(fileDTO -> {
|
|
|
|
|
boolean currentFileType = StrUtil.equalsAny(fileDTO.getFileType(), "doc", "docx", "txt", "md", "pdf");
|
|
|
|
|
if (!currentFileType) {
|
|
|
|
|
log.info("文件:{} 文件类型非doc、docx、txt、md、pdf,进行文档知识库维护...", fileDTO.getFileName());
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}).map(RecordFileDTO::getFileId).toList();
|
|
|
|
|
|
|
|
|
|
Map<String, RecordFileDTO> recordFileDTOMap = recordFileDTOS.stream().collect(Collectors.toMap(RecordFileDTO::getFileId, target -> target, (k1, k2) -> k1));
|
|
|
|
|
|
|
|
|
|
log.info("案件:{},共有:{}个笔录文件,符合上传要求的文件有:{}",modelCase.getCaseName(), allFileIds.size(), textFileIdList.size());
|
|
|
|
|
for (String fileId : textFileIdList) {
|
|
|
|
|
// 把新增的笔录数据添加到到知识库
|
|
|
|
|
if (!documentFileIds.contains(fileId)){
|
|
|
|
|
RecordFileDTO recordFileDTO = recordFileDTOMap.get(fileId);
|
|
|
|
|
String documentContent = StrUtil.isEmpty(recordFileDTO.getReviseText()) ? recordFileDTO.getOcrText() : recordFileDTO.getReviseText();
|
|
|
|
|
if (StrUtil.isEmpty(documentContent)){
|
|
|
|
|
log.warn("文件:{},ocr识别信息不存在,不进行文件知识库维护...", fileId);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
log.info("案件:{},笔录文件:{},添加到文档知识库...",modelCase.getCaseName(), recordFileDTOMap.get(fileId).getFileName());
|
|
|
|
|
this.createDocumentByText(modelCase.getKnowledgeBaseId(),fileId,recordFileDTO.getFileName(),documentContent);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
log.info("案件:{}上传笔录文本到知识库完成!",modelCase.getCaseName());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 删除案件中不存在的笔录文件
|
|
|
|
|
* @param modelCase 案件基本信息
|
|
|
|
|
* @param allFileIds 笔录文件id集合
|
|
|
|
|
* @param documents dify知识库文档列表
|
|
|
|
|
*/
|
|
|
|
|
public void deleteAbsentCaseFileInDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds,List<Document> documents){
|
|
|
|
|
|
|
|
|
|
if (CollUtil.isNotEmpty(recordFileIds)){
|
|
|
|
|
for (Document document : documents) {
|
|
|
|
|
String fileId = document.getFileId();
|
|
|
|
|
if (StrUtil.isNotEmpty(fileId) && !recordFileIds.contains(fileId)){
|
|
|
|
|
if (StrUtil.isNotEmpty(fileId) && !allFileIds.contains(fileId)){
|
|
|
|
|
// 删除不在笔录文件列表中的知识库
|
|
|
|
|
log.info("案件:{},笔录文件:{},从知识库中删除...",modelCase.getCaseName(), document.getName());
|
|
|
|
|
this.deleteDocument(modelCase.getKnowledgeBaseId(),document.getId());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
log.info("案件:{}上传笔录文件到知识库完成!",modelCase.getCaseName());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|