1. 案件文件同步dify知识库-支持图片类型

jinan_dev
xueqingkun 5 months ago
parent 68dd52ff81
commit c5a4178d29

@ -13,6 +13,7 @@ public class DifyConstants {
public static final String METHOD_DOCUMENT = "/document";
public static final String METHOD_DOCUMENTS = "/documents";
public static final String METHOD_CREATE_BY_FILE = "/create-by-file";
public static final String METHOD_CREATE_BY_TEXT = "/create_by_text";
public static final String METHOD_CHAT_MESSAGES = "/chat-messages";
public static final String DATASET_INDEXING_TECHNIQUE_HIGH_QUALITY = "high_quality";

@ -27,6 +27,9 @@ public class RecordFileDTO {
@Schema(description = "文件大小")
private String fileSize;
@Schema(description = "文件类型")
private String fileType;
@Schema(description = "文件识别状态 -1:未识别 0正在识别 1识别成功 2识别失败")
private String ocrStatus;

@ -76,8 +76,8 @@ public class ModelCaseServiceImpl extends ServiceImpl<ModelCaseMapper, ModelCase
private CaseEvidenceService caseEvidenceService;
private final EvidenceDirectoryService directoryService;
private final DifyApiUtil difyApiUtil;
@Autowired
private DifyApiUtil difyApiUtil;
/**
*
@ -402,8 +402,8 @@ public class ModelCaseServiceImpl extends ServiceImpl<ModelCaseMapper, ModelCase
log.info("案件:{} 尚未完成案件分析,不进行笔录上传...",modelCase.getCaseName());
return;
}
log.info("案件:{} 开始上传笔录到支持库...",modelCase.getCaseName());
difyApiUtil.uploadCaseFileToDifyKnowledgeBase(modelCase,this.listCaseFileIds(caseId));
log.info("案件:{} 开始上传笔录到知识库...",modelCase.getCaseName());
difyApiUtil.syncCaseFileToDifyKnowledgeBase(modelCase,this.listCaseFileIds(caseId));
log.info("案件:{} 上传笔录到知识库成功...",modelCase.getCaseName());
}

@ -176,7 +176,7 @@ public class ModelServiceImpl implements ModelService {
calculateFinalScore(analyseCaseDTO, modelCase, atomicResultMap);
caseStatusManageService.whenAnalyseCaseSuccess(analyseCaseDTO.getCaseId(), modelCase.getTotalScore());
// 计算完成之后,把所有的笔录上传到模型
difyApiUtil.uploadCaseFileToDifyKnowledgeBase(modelCase, modelCaseService.listCaseFileIds(analyseCaseDTO.getCaseId()));
difyApiUtil.syncCaseFileToDifyKnowledgeBase(modelCase, modelCaseService.listCaseFileIds(analyseCaseDTO.getCaseId()));
return R.ok();
}
@ -328,7 +328,7 @@ public class ModelServiceImpl implements ModelService {
modelCase.setTotalScore(max);
log.info("更新案件得分情况。最终得分:{}分(共性+入罪/共性+出罪 取最大值)。入罪:{}分。出罪:{}分。共性:{}分。", max, rz, cz, gx);
caseStatusManageService.whenAnalyseCaseSuccess(analyseCaseDTO.getCaseId(), modelCase.getTotalScore());
difyApiUtil.uploadCaseFileToDifyKnowledgeBase(modelCase, modelCaseService.listCaseFileIds(caseId));
difyApiUtil.syncCaseFileToDifyKnowledgeBase(modelCase, modelCaseService.listCaseFileIds(caseId));
return R.ok();
}

@ -17,15 +17,12 @@ import com.supervision.police.dto.NoteRecordDTO;
import com.supervision.police.mapper.NoteRecordMapper;
import com.supervision.police.service.ModelCaseService;
import com.supervision.police.service.NoteRecordService;
import com.supervision.utils.DifyApiUtil;
import com.supervision.utils.Document;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.io.InputStream;
import java.util.*;
import java.util.stream.Collectors;
@Slf4j
@Service
@ -39,8 +36,6 @@ public class NoteRecordServiceImpl extends ServiceImpl<NoteRecordMapper, NoteRec
private final MinioService minioService;
private final DifyApiUtil difyApiUtil;
@Override
public List<NoteRecordDTO> selectNoteRecordDTOList(NoteRecord noteRecord) {
return super.getBaseMapper().selectNoteRecordDTOList(noteRecord);

@ -1,16 +1,20 @@
package com.supervision.utils;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.supervision.minio.domain.MinioFile;
import com.supervision.minio.service.MinioService;
import com.supervision.police.domain.ModelCase;
import com.supervision.police.dto.RecordFileDTO;
import com.supervision.police.service.FileOcrProcessService;
import com.supervision.police.vo.dify.DatasetReqVO;
import com.supervision.police.vo.dify.DatasetResVO;
import com.supervision.police.vo.dify.DifyChatReqVO;
@ -50,6 +54,9 @@ public class DifyApiUtil {
@Autowired
private MinioService minioService;
@Autowired
private FileOcrProcessService fileOcrProcessService;
public DifyChatResVO chat(DifyChatReqVO difyChatReqVO) {
DifyChatResVO difyChatResVO = null;
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
@ -200,6 +207,45 @@ public class DifyApiUtil {
}
return id;
}
public String createDocumentByText(String datasetId,String fileId,String fileName,String documentContent){
Assert.notEmpty(fileName, "文件名称不能为空");
if (StrUtil.isNotEmpty(fileId)){
fileName = generateDocumentName(fileName, fileId);
}
return createDocumentByText(datasetId,fileName,documentContent);
}
public String createDocumentByText(String datasetId,String documentName,String documentContent) {
// 创建文档url
String url = difyUrl + METHOD_DATASET + "/" + datasetId + METHOD_DOCUMENT + METHOD_CREATE_BY_TEXT;
cn.hutool.http.HttpRequest request = HttpUtil.createPost(url);
request.header(HttpHeaders.AUTHORIZATION, difyDatasetAuth);
Map<String, Object> body = new HashMap<>();
body.put("indexing_technique","high_quality");// 索引方式:高质量
body.put("process_rule",Map.of("mode","automatic"));//处理规则:自动
body.put("name",documentName);// 文档名称
body.put("text",documentContent);//文档内容
request.body(JSONUtil.toJsonStr(body));
try (cn.hutool.http.HttpResponse response = request.execute()) {
if (!response.isOk()) {
log.error("创建知识库文档失败!状态码:{},dify服务端返回内容:{}", response.getStatus(),response.body());
throw new RuntimeException("创建知识库文档失败!");
}
JSONObject document = JSONUtil.parseObj(response.body()).getJSONObject("document");
if (document == null){
log.error("创建知识库文档失败!dify服务端返回内容:{}",response.body());
throw new RuntimeException("创建知识库文档失败!");
}
log.info("创建知识库文档成功!ID:{}", document.get("id"));
return document.get("id").toString();
}
}
/**
*
@ -257,28 +303,26 @@ public class DifyApiUtil {
return documents;
}
public void uploadCaseFileToDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds) {
public void uploadCaseFileToDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds,List<Document> documents) {
if (StrUtil.isEmpty(modelCase.getKnowledgeBaseId())){
log.warn("uploadRecordFileToKnowledgeBase:案件:{}案件或者知识库为空,不进行知识库维护!", modelCase.getCaseName());
return;
}
List<Document> documents = this.queryDocuments(modelCase.getKnowledgeBaseId());
Set<String> documentFileIds = documents.stream().map(Document::getFileId).collect(Collectors.toSet());
// 只上传 doc docx、txt、md、pdf 文件且文件大小不能超过15mb
List<MinioFile> minioFiles = minioService.listMinioFile(allFileIds);
List<String> recordFileIds = minioFiles.stream().filter(minioFile -> {
boolean currentFileSize = minioFile.getSize() < 15 * 1024 * 1024;
if (!currentFileSize) {
log.warn("文件大小超过15mb不进行知识库维护:{}", minioFile.getFilename());
log.warn("文件大小超过15mb不进行文件知识库维护:{}", minioFile.getFilename());
return false;
}
boolean currentFileType = StrUtil.equalsAny(minioFile.getFileType(), "doc", "docx", "txt", "md", "pdf");
if (!currentFileType) {
log.warn("文件:{} 文件类型非doc、docx、txt、md、pdf不进行知识库维护...", minioFile.getFilename());
log.warn("文件:{} 文件类型非doc、docx、txt、md、pdf不进行文件知识库维护...", minioFile.getFilename());
return false;
}
return true;
@ -295,18 +339,86 @@ public class DifyApiUtil {
}
}
log.info("案件:{}上传笔录文件到知识库完成!",modelCase.getCaseName());
}
/**
*
* @param modelCase
* @param allFileIds id
*/
public void syncCaseFileToDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds){
List<Document> documents = this.queryDocuments(modelCase.getKnowledgeBaseId());
// 把可以被dify直接识别的文件添加到知识库
uploadCaseFileToDifyKnowledgeBase(modelCase,allFileIds,documents);
// 把dify不能直接识别的文件ocr识别的内容添加到知识库
uploadCaseFileTextToDifyKnowledgeBase(modelCase,allFileIds,documents);
documents = this.queryDocuments(modelCase.getKnowledgeBaseId());
deleteAbsentCaseFileInDifyKnowledgeBase(modelCase,allFileIds,documents);
}
public void uploadCaseFileTextToDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds,List<Document> documents) {
if (StrUtil.isEmpty(modelCase.getKnowledgeBaseId())){
log.warn("uploadCaseFileTextToDifyKnowledgeBase:案件:{}案件或者知识库为空,不进行知识库维护!", modelCase.getCaseName());
return;
}
Set<String> documentFileIds = documents.stream().map(Document::getFileId).collect(Collectors.toSet());
if (CollUtil.isNotEmpty(recordFileIds)){
for (Document document : documents) {
String fileId = document.getFileId();
if (StrUtil.isNotEmpty(fileId) && !recordFileIds.contains(fileId)){
// 删除不在笔录文件列表中的知识库
log.info("案件:{},笔录文件:{},从知识库中删除...",modelCase.getCaseName(), document.getName());
this.deleteDocument(modelCase.getKnowledgeBaseId(),document.getId());
List<RecordFileDTO> recordFileDTOS = fileOcrProcessService.queryFileList(allFileIds);
// 上传除了 doc docx、txt、md、pdf 之外的文件
List<String> textFileIdList = recordFileDTOS.stream().filter(fileDTO -> {
boolean currentFileType = StrUtil.equalsAny(fileDTO.getFileType(), "doc", "docx", "txt", "md", "pdf");
if (!currentFileType) {
log.info("文件:{} 文件类型非doc、docx、txt、md、pdf进行文档知识库维护...", fileDTO.getFileName());
return true;
}
return false;
}).map(RecordFileDTO::getFileId).toList();
Map<String, RecordFileDTO> recordFileDTOMap = recordFileDTOS.stream().collect(Collectors.toMap(RecordFileDTO::getFileId, target -> target, (k1, k2) -> k1));
log.info("案件:{},共有:{}个笔录文件,符合上传要求的文件有:{}",modelCase.getCaseName(), allFileIds.size(), textFileIdList.size());
for (String fileId : textFileIdList) {
// 把新增的笔录数据添加到到知识库
if (!documentFileIds.contains(fileId)){
RecordFileDTO recordFileDTO = recordFileDTOMap.get(fileId);
String documentContent = StrUtil.isEmpty(recordFileDTO.getReviseText()) ? recordFileDTO.getOcrText() : recordFileDTO.getReviseText();
if (StrUtil.isEmpty(documentContent)){
log.warn("文件:{}ocr识别信息不存在不进行文件知识库维护...", fileId);
continue;
}
log.info("案件:{},笔录文件:{},添加到文档知识库...",modelCase.getCaseName(), recordFileDTOMap.get(fileId).getFileName());
this.createDocumentByText(modelCase.getKnowledgeBaseId(),fileId,recordFileDTO.getFileName(),documentContent);
}
}
log.info("案件:{}上传笔录文本到知识库完成!",modelCase.getCaseName());
}
/**
*
* @param modelCase
* @param allFileIds id
* @param documents dify
*/
public void deleteAbsentCaseFileInDifyKnowledgeBase(ModelCase modelCase,List<String> allFileIds,List<Document> documents){
for (Document document : documents) {
String fileId = document.getFileId();
if (StrUtil.isNotEmpty(fileId) && !allFileIds.contains(fileId)){
// 删除不在笔录文件列表中的知识库
log.info("案件:{},笔录文件:{},从知识库中删除...",modelCase.getCaseName(), document.getName());
this.deleteDocument(modelCase.getKnowledgeBaseId(),document.getId());
}
}
log.info("案件:{}上传笔录文件到知识库完成!",modelCase.getCaseName());
}

@ -28,6 +28,7 @@
mf.id as fileId,
mf.filename as fileName,
mf.size as fileSize,
mf.file_type as fileType,
op.id as ocrId,
op.status as ocrStatus,
op.ocr_text as ocrText,

@ -55,4 +55,10 @@ public class DifyTest {
System.out.println(documents.size());
}
@Test
public void createDocumentByTextTest(){
String documentId = difyApiUtil.createDocumentByText("d5da0a24-a8e9-4ceb-8004-6da4847fbb04", "test-text", "坚定不移中国特色社会主义道路");
System.out.println(documentId);
}
}

@ -419,7 +419,7 @@ public class FuHsiApplicationTests {
ModelCaseService modelCaseService;
@Test
public void initCaseKnowledgeBaseTest(){
modelCaseService.initCaseKnowledgeBase("1823935118734643202");
modelCaseService.initCaseKnowledgeBase("1825347949617033217");
}
@Test

Loading…
Cancel
Save