1. 证据改造代码提交

topo_dev
xueqingkun 6 months ago
parent edf9f23e87
commit e7fef968bc

@ -106,7 +106,9 @@ public class EvidenceDirectoryDTO {
public List<String> listAllFileId(){
List<String> fileIdList = new ArrayList<>();
fileIdList.add(this.id);
if (CollUtil.isNotEmpty(this.fileIdList)){
fileIdList.addAll(this.fileIdList);
}
if (CollUtil.isNotEmpty(this.child)){
for (EvidenceDirectoryDTO child : this.getChild()) {
fileIdList.addAll(child.listAllFileId());

@ -121,7 +121,7 @@ public interface CaseEvidenceService extends IService<CaseEvidence> {
String ocrAndExtract(String caseId, List<EvidenceDirectoryDTO> evidenceFileDTOS);
String updateCaseEvidence(List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOList);
String updateCaseEvidence4NewTransaction(List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOList);
/**
*
@ -141,4 +141,10 @@ public interface CaseEvidenceService extends IService<CaseEvidence> {
EvidenceDirectory createDirectory(EvidenceDirectoryReqVO evidenceDirectory);
/**
* ocr note: word pfd
* @param evidenceFileList
*/
void preSyncSubmitOCR(List<EvidenceFileDTO> evidenceFileList);
}

@ -6,6 +6,7 @@ import com.supervision.police.dto.OCRResDTO;
import com.supervision.police.dto.RecordFileDTO;
import java.util.List;
import java.util.function.Consumer;
/**
* @author Administrator
@ -18,6 +19,8 @@ public interface FileOcrProcessService extends IService<FileOcrProcess> {
List<FileOcrProcess> syncSubmitOCR(List<String> fileIdList);
List<FileOcrProcess> syncSubmitOCR(List<String> fileIdList, Consumer<List<FileOcrProcess>> consumer);
void doOCRTask(List<FileOcrProcess> fileOcrProcesses);
@ -49,4 +52,9 @@ public interface FileOcrProcessService extends IService<FileOcrProcess> {
List<RecordFileDTO> queryFileListWithIdSortNoTransaction(List<String> fileIdList);
void doWordCRTask(List<FileOcrProcess> fileOcrProcesses);
void doPdfCRTask(List<FileOcrProcess> fileOcrProcesses);
}

@ -33,6 +33,7 @@ import java.util.Map;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Slf4j
@Service
@ -433,21 +434,20 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
public String ocrAndExtract(String caseId, List<EvidenceDirectoryDTO> evidenceFileDTOS) {
List<CaseEvidenceDetailDTO> oldEvidences = this.queryEvidenceList(caseId);
List<EvidenceDirectoryDTO> evidenceDirectoryDTOS = listFileTree(caseId);
List<EvidenceDirectoryDTO> oldEvidenceDirectoryDTOS = listFileTree(caseId);
List<CaseEvidenceDetailDTO> newEvidences = toCaseCaseEvidenceDetailDTO(evidenceFileDTOS, evidenceDirectoryDTOS);
List<CaseEvidenceDetailDTO> newEvidences = toCaseCaseEvidenceDetailDTO(evidenceFileDTOS, oldEvidenceDirectoryDTOS);
newEvidences.forEach(caseEvidenceDetailDTO -> caseEvidenceDetailDTO.setCaseId(caseId));
List<CaseEvidenceDetailDTO> operationalEvidence = findChangedEvidence(oldEvidences, newEvidences);
List<CaseEvidenceDetailDTO> operationalEvidenceList = findChangedEvidence(oldEvidences, newEvidences);
String batchId = ((CaseEvidenceService)AopContext.currentProxy()).updateCaseEvidence(operationalEvidence);
String batchId = ((CaseEvidenceService)AopContext.currentProxy()).updateCaseEvidence4NewTransaction(operationalEvidenceList);
for (CaseEvidenceDetailDTO caseEvidenceDetailDTO : operationalEvidence) {
if (StrUtil.equalsAny(caseEvidenceDetailDTO.getUpdateStatus(),"-1","0")){
// 只需要识别即可
fileOcrProcessService.syncSubmitOCR(List.of(caseEvidenceDetailDTO.getId()));
// todo:是否需要提取标题
}
List<EvidenceFileDTO> evidenceFileDTOList = operationalEvidenceList.stream()
.flatMap(evidenceDetailDTO -> evidenceDetailDTO.getFileList().stream().filter(file -> StrUtil.equals(file.getUpdateStatus(),"1"))).toList();
((CaseEvidenceService)AopContext.currentProxy()).preSyncSubmitOCR(evidenceFileDTOList);
for (CaseEvidenceDetailDTO caseEvidenceDetailDTO : operationalEvidenceList) {
if (StrUtil.equals(caseEvidenceDetailDTO.getUpdateStatus(),"1")){
// 需要分析ocr识别+标题提取)
xxlJobService.executeTaskByJobHandler("evidenceAnalysis", caseEvidenceDetailDTO.getId());
@ -458,9 +458,14 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
}
/**
* note:
* @param caseEvidenceDetailDTOList
* @return
*/
@Override
@Transactional(transactionManager = "dataSourceTransactionManager",propagation = Propagation.REQUIRES_NEW, rollbackFor = Exception.class)
public String updateCaseEvidence(List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOList) {
public String updateCaseEvidence4NewTransaction(List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOList) {
String batchNo = DateTime.now().toString("yyyyMMddHHmmss");
for (CaseEvidenceDetailDTO evidence : caseEvidenceDetailDTOList) {
@ -471,7 +476,7 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
this.save(caseEvidence);
evidence.setId(caseEvidence.getId());
for (EvidenceFileDTO evidenceFileDTO : evidence.getFileList()) {
if (evidenceFileDTO.getUpdateStatus().equals("1")){
if (StrUtil.equalsAny(evidenceFileDTO.getUpdateStatus(),"1","2")){
// 新增
EvidenceFile evidenceFile = new EvidenceFile(caseEvidence.getId(), evidenceFileDTO.getFileId());
evidenceFile.setDirectoryId(evidence.getDirectoryId());
@ -490,6 +495,17 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
}*/
}
}else if (evidence.getUpdateStatus().equals("-1")){
for (EvidenceFileDTO evidenceFileDTO : evidence.getFileList()) {
if (StrUtil.equalsAny(evidenceFileDTO.getUpdateStatus(),"1","2")){
// 新增
EvidenceFile evidenceFile = new EvidenceFile(evidence.getId(), evidenceFileDTO.getFileId());
evidenceFile.setDirectoryId(evidence.getDirectoryId());
evidenceFile.setRank(evidenceFileDTO.getRank());
evidenceFile.setBatchNo(batchNo);
evidenceFileService.save(evidenceFile);
}
}
}/*else if (evidence.getUpdateStatus().equals("0")){
// 删除
this.removeById(evidence.getId());
@ -667,6 +683,21 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
return directory1;
}
@Override
@Transactional(rollbackFor = Exception.class, propagation= Propagation.REQUIRES_NEW, transactionManager = "dataSourceTransactionManager")
public void preSyncSubmitOCR(List<EvidenceFileDTO> evidenceFileList) {
for (EvidenceFileDTO evidenceFileDTO : evidenceFileList) {
if (StrUtil.equalsAny(evidenceFileDTO.getFileType(),"doc","docx")){
fileOcrProcessService.syncSubmitOCR(List.of(evidenceFileDTO.getFileId()), fileOcrProcessService::doWordCRTask);
}else if (StrUtil.equalsAny(evidenceFileDTO.getFileType(),"pdf")){
fileOcrProcessService.syncSubmitOCR(List.of(evidenceFileDTO.getFileId()), fileOcrProcessService::doPdfCRTask);
}else {
fileOcrProcessService.syncSubmitOCR(List.of(evidenceFileDTO.getFileId()), fileOcrProcessService::doOCRTask);
}
}
}
private CaseEvidenceDetailDTO findEvidenceDetail(String evidenceId, List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOS) {
for (CaseEvidenceDetailDTO caseEvidenceDetailDTO : caseEvidenceDetailDTOS) {
@ -677,8 +708,7 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
return null;
}
private List<CaseEvidenceDetailDTO> toCaseCaseEvidenceDetailDTO(List<EvidenceDirectoryDTO> newDirectoryDTOS,
List<EvidenceDirectoryDTO> evidenceDirectoryDTOS) {
private List<CaseEvidenceDetailDTO> toCaseCaseEvidenceDetailDTO(List<EvidenceDirectoryDTO> newDirectoryDTOS,List<EvidenceDirectoryDTO> oldEvidenceDirectoryDTOS) {
if (CollUtil.isEmpty(newDirectoryDTOS)){
return new ArrayList<>();
}
@ -692,7 +722,7 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
for (EvidenceDirectoryDTO evidenceFile : floatNewDirectoryDTOS) {
String directoryId = evidenceFile.getId();
EvidenceDirectoryDTO directory = evidenceDirectoryService.findDirectoryById(directoryId, evidenceDirectoryDTOS);
EvidenceDirectoryDTO directory = evidenceDirectoryService.findDirectoryById(directoryId, oldEvidenceDirectoryDTOS);
if (null == directory){
log.warn("toCaseCaseEvidenceDetailDTO:目录id{}不存在对应的目录分类信息",directoryId);
continue;
@ -701,20 +731,28 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
if (directory.getLevel() == 1 || directory.getLevel() == 2){
for (String fileId : evidenceFile.getFileIdList()) {
CaseEvidenceDetailDTO caseEvidenceDetailDTO = new CaseEvidenceDetailDTO();
if (null != fileMap.get(fileId)){
// 证据名为文件名
caseEvidenceDetailDTO.setEvidenceName(fileMap.get(fileId).getFilename());
}
caseEvidenceDetailDTO.setEvidenceType(directory.getCategoryId());
caseEvidenceDetailDTO.setDirectoryId(directoryId);
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(fileId);
MinioFile minioFile = fileMap.get(fileId);
if (null != minioFile){
// 证据名为文件名
caseEvidenceDetailDTO.setEvidenceName(minioFile.getFilename());
evidenceFileDTO.setFileType(minioFile.getFileType());
evidenceFileDTO.setFileName(minioFile.getFilename());
}
caseEvidenceDetailDTO.setFileList(List.of(evidenceFileDTO));
caseEvidenceDetailDTOS.add(caseEvidenceDetailDTO);
}
}
if (directory.getLevel() == 3){
CaseEvidenceDetailDTO caseEvidenceDetailDTO = new CaseEvidenceDetailDTO();
if (CollUtil.isNotEmpty(directory.getFileInfoList())){
String evidenceId = CollUtil.getFirst(directory.getFileInfoList()).getEvidenceId();
caseEvidenceDetailDTO.setId(evidenceId);
}
// 证据名为目录名
caseEvidenceDetailDTO.setEvidenceName(evidenceFile.getDirectoryName());
caseEvidenceDetailDTO.setEvidenceType(directory.getCategoryId());
@ -723,6 +761,12 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
for (String fileId : evidenceFile.getFileIdList()) {
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(fileId);
MinioFile minioFile = fileMap.get(fileId);
if (null != minioFile){
evidenceFileDTO.setFileName(minioFile.getFilename());
evidenceFileDTO.setFileType(minioFile.getFileType());
}
evidenceFileDTOS.add(evidenceFileDTO);
}
caseEvidenceDetailDTO.setFileList(evidenceFileDTOS);
@ -772,6 +816,11 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
if (CollUtil.isNotEmpty(oldEvidenceList) && CollUtil.isNotEmpty(newEvidenceFileList)){
// 数据库中和新数据都存在
Map<String, EvidenceFileDTO> fileCache = Stream.of(newEvidenceFileList, oldEvidenceList)
.flatMap(Collection::stream)
.flatMap(evidenceDetailDTO -> evidenceDetailDTO.getFileList().stream())
.collect(Collectors.toMap(EvidenceFileDTO::getFileId, Function.identity()));
for (CaseEvidenceDetailDTO oldEvidence : oldEvidenceList) {
boolean isFind = false;
for (CaseEvidenceDetailDTO newEvidence : newEvidenceFileList) {
@ -780,7 +829,7 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
oldEvidence.setUpdateStatus("-1");
List<String> oldFileIds = oldEvidence.getFileList().stream().map(EvidenceFileDTO::getFileId).toList();
List<String> newFileIds = newEvidence.getFileList().stream().map(EvidenceFileDTO::getFileId).toList();
TupleIdRecord tupleIdRecord = compareFileList(oldFileIds, newFileIds);
TupleIdRecord tupleIdRecord = compareFileList(newFileIds,oldFileIds);
List<EvidenceFileDTO> updateFileList = new ArrayList<>();
// 新增的文件顺序排在原有文件的后面
@ -789,6 +838,10 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(addFileId);
evidenceFileDTO.setUpdateStatus("1");
if (null != fileCache.get(addFileId)){
evidenceFileDTO.setFileType(fileCache.get(addFileId).getFileType());
}
evidenceFileDTO.setRank(initOrder + findRank(newFileIds, addFileId));
updateFileList.add(evidenceFileDTO);
}
@ -796,6 +849,9 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(deleteFileId);
evidenceFileDTO.setUpdateStatus("0");
if (null != fileCache.get(deleteFileId)){
evidenceFileDTO.setFileType(fileCache.get(deleteFileId).getFileType());
}
evidenceFileDTO.setRank(findRank(oldFileIds, deleteFileId));
updateFileList.add(evidenceFileDTO);
}
@ -804,6 +860,9 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(updateFileId);
evidenceFileDTO.setUpdateStatus("2");
if (null != fileCache.get(updateFileId)){
evidenceFileDTO.setFileType(fileCache.get(updateFileId).getFileType());
}
evidenceFileDTO.setRank(findRank(oldFileIds, updateFileId));
updateFileList.add(evidenceFileDTO);
}

@ -1,6 +1,7 @@
package com.supervision.police.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
@ -17,6 +18,8 @@ import com.supervision.police.service.FileOcrProcessService;
import com.supervision.police.mapper.FileOcrProcessMapper;
import com.supervision.police.service.NoteRecordService;
import com.supervision.police.service.OCRService;
import com.supervision.utils.PDFReadUtil;
import com.supervision.utils.WordReadUtil;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.aop.framework.AopContext;
@ -28,6 +31,7 @@ import org.springframework.transaction.annotation.Transactional;
import java.util.*;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
@ -61,6 +65,11 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
return submitOCR(fileIdList, this::doOCRTask);
}
@Override
public List<FileOcrProcess> syncSubmitOCR(List<String> fileIdList, Consumer<List<FileOcrProcess>> consumer) {
return submitOCR(fileIdList, consumer);
}
private List<FileOcrProcess> submitOCR(List<String> fileIdList, Consumer<List<FileOcrProcess>> consumer){
if (CollUtil.isEmpty(fileIdList)){
@ -91,7 +100,8 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
log.debug("submitOCR:提交识别任务到异步处理器中...");
consumer.accept(processList);
return super.lambdaQuery().in(FileOcrProcess::getFileId, fileIdList).list();
List<FileOcrProcess> resultList = super.lambdaQuery().in(FileOcrProcess::getFileId, fileIdList).list();
return sortByIdOrder(fileIdList,resultList, FileOcrProcess::getFileId);
}
@Override
@ -186,7 +196,7 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
@Override
public List<RecordFileDTO> queryFileListWithIdSort(List<String> fileIdList) {
List<RecordFileDTO> recordFileDTOS = this.queryFileList(fileIdList);
return sortByIdOrder(fileIdList, recordFileDTOS);
return sortByIdOrder(fileIdList, recordFileDTOS, RecordFileDTO::getFileId);
}
@Override
@ -215,6 +225,44 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
return this.queryFileListWithIdSort(fileIdList);
}
@Override
public void doWordCRTask(List<FileOcrProcess> fileOcrProcesses) {
doMcr(fileOcrProcesses, (fileId)-> WordReadUtil.readWordInMinio(minioService, fileId));
}
@Override
public void doPdfCRTask(List<FileOcrProcess> fileOcrProcesses) {
doMcr(fileOcrProcesses, (fileId)-> PDFReadUtil.readPdfInMinio(minioService, fileId));
}
private void doMcr(List<FileOcrProcess> fileOcrProcesses, Function<String, String> function) {
log.info("doMcr:开始识别文件...{}",JSONUtil.toJsonStr(fileOcrProcesses));
if (CollUtil.isEmpty(fileOcrProcesses)){
log.info("doMcr:当前暂无识别的任务,结束...");
return;
}
for (FileOcrProcess ocrProcess : fileOcrProcesses) {
log.info("ocr:开始识别文件:{}", JSONUtil.toJsonStr(ocrProcess));
this.updateOCrStatus(List.of(ocrProcess.getFileId()),OcrProcessStatus.PROCESSING.getCode());
try {
String ocrText = function.apply(ocrProcess.getFileId());
Assert.notNull(ocrText, "识别结果为空");
this.lambdaUpdate().eq(FileOcrProcess::getFileId, ocrProcess.getFileId())
.set(FileOcrProcess::getStatus, OcrProcessStatus.PROCESSING.getCode())
.set(FileOcrProcess::getOcrText, ocrText).update();
} catch (Exception e) {
log.error("doMcr识别失败",e);
this.updateOCrStatus(List.of(ocrProcess.getFileId()),OcrProcessStatus.FAIL.getCode());
}
}
}
private List<OCRReqDTO> buildOCRReqDTO(List<String> fileIdList){
List<MinioFile> minioFiles = minioService.listMinioFile(fileIdList);
@ -225,15 +273,15 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
}
private List<RecordFileDTO> sortByIdOrder(List<String> fileIdList,List<RecordFileDTO> recordFileDTOList){
private <T> List<T> sortByIdOrder(List<String> idList,List<T> targetList, Function<T,String> function){
if (CollUtil.size(fileIdList) < 2 || CollUtil.size(recordFileDTOList) < 2) {
return recordFileDTOList;
if (CollUtil.size(idList) < 2 || CollUtil.size(targetList) < 2) {
return targetList;
}
Map<String, RecordFileDTO> recordFileDTOMap = recordFileDTOList.stream()
.collect(Collectors.toMap(RecordFileDTO::getFileId,recordFileDTO -> recordFileDTO, (k1, k2) -> k1));
return fileIdList.stream().map(recordFileDTOMap::get).filter(Objects::nonNull).collect(Collectors.toList());
Map<String, T> targetMap = targetList.stream()
.collect(Collectors.toMap(function,target -> target, (k1, k2) -> k1));
return idList.stream().map(targetMap::get).filter(Objects::nonNull).collect(Collectors.toList());
}
}

@ -73,7 +73,7 @@ public class LLMExtractServiceImpl implements LLMExtractService {
//判断content是否为JSON格式如果是则尝试转换为JSON格式并获取title属性的值
try {
JSONObject jsonObject = new JSONObject(content);
String title = jsonObject.getString("title");
String title = jsonObject.optString("title");
ocrExtractDto.setTitle(title);
result.add(ocrExtractDto);
} catch (Exception e) {
@ -155,7 +155,7 @@ public class LLMExtractServiceImpl implements LLMExtractService {
log.info("属性提取结果: {}", jsonObject);
List<NotePromptExtractAttributesDto> extractAttributesList = LLMExtractDto.getExtractAttributes();
extractAttributesMap.forEach((key, value) -> {
String attrValue = jsonObject.getString(key);
String attrValue = jsonObject.optString(key);
log.info("属性提取结果: 【{}】。Key【{}】", attrValue, key);
NotePromptExtractAttributesDto extractAttributesDto = new NotePromptExtractAttributesDto();
extractAttributesDto.setAttrName(key);

@ -1,5 +1,6 @@
package com.supervision.utils;
import com.supervision.minio.service.MinioService;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
@ -20,4 +21,14 @@ public class PDFReadUtil {
}
return text;
}
public static String readPdfInMinio(MinioService minioService, String fileId) {
try (InputStream inputStream = minioService.getObjectInputStream(fileId)){
return pdf2text(inputStream);
} catch (Exception e) {
log.error("从minio中获取文件失败", e);
return null;
}
}
}

@ -32,7 +32,7 @@
c.category_name as categoryName,
c.icon_url as iconURL,
c.parent_id as parent_id,
np.name as name
np.name as promptName
from evidence_category c
left join note_prompt np on c.prompt_id = np.id
<where>

Loading…
Cancel
Save