1. 证据改造代码提交

topo_dev
xueqingkun 6 months ago
parent edf9f23e87
commit e7fef968bc

@ -106,7 +106,9 @@ public class EvidenceDirectoryDTO {
public List<String> listAllFileId(){ public List<String> listAllFileId(){
List<String> fileIdList = new ArrayList<>(); List<String> fileIdList = new ArrayList<>();
fileIdList.add(this.id); if (CollUtil.isNotEmpty(this.fileIdList)){
fileIdList.addAll(this.fileIdList);
}
if (CollUtil.isNotEmpty(this.child)){ if (CollUtil.isNotEmpty(this.child)){
for (EvidenceDirectoryDTO child : this.getChild()) { for (EvidenceDirectoryDTO child : this.getChild()) {
fileIdList.addAll(child.listAllFileId()); fileIdList.addAll(child.listAllFileId());

@ -121,7 +121,7 @@ public interface CaseEvidenceService extends IService<CaseEvidence> {
String ocrAndExtract(String caseId, List<EvidenceDirectoryDTO> evidenceFileDTOS); String ocrAndExtract(String caseId, List<EvidenceDirectoryDTO> evidenceFileDTOS);
String updateCaseEvidence(List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOList); String updateCaseEvidence4NewTransaction(List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOList);
/** /**
* *
@ -141,4 +141,10 @@ public interface CaseEvidenceService extends IService<CaseEvidence> {
EvidenceDirectory createDirectory(EvidenceDirectoryReqVO evidenceDirectory); EvidenceDirectory createDirectory(EvidenceDirectoryReqVO evidenceDirectory);
/**
* ocr note: word pfd
* @param evidenceFileList
*/
void preSyncSubmitOCR(List<EvidenceFileDTO> evidenceFileList);
} }

@ -6,6 +6,7 @@ import com.supervision.police.dto.OCRResDTO;
import com.supervision.police.dto.RecordFileDTO; import com.supervision.police.dto.RecordFileDTO;
import java.util.List; import java.util.List;
import java.util.function.Consumer;
/** /**
* @author Administrator * @author Administrator
@ -18,6 +19,8 @@ public interface FileOcrProcessService extends IService<FileOcrProcess> {
List<FileOcrProcess> syncSubmitOCR(List<String> fileIdList); List<FileOcrProcess> syncSubmitOCR(List<String> fileIdList);
List<FileOcrProcess> syncSubmitOCR(List<String> fileIdList, Consumer<List<FileOcrProcess>> consumer);
void doOCRTask(List<FileOcrProcess> fileOcrProcesses); void doOCRTask(List<FileOcrProcess> fileOcrProcesses);
@ -49,4 +52,9 @@ public interface FileOcrProcessService extends IService<FileOcrProcess> {
List<RecordFileDTO> queryFileListWithIdSortNoTransaction(List<String> fileIdList); List<RecordFileDTO> queryFileListWithIdSortNoTransaction(List<String> fileIdList);
void doWordCRTask(List<FileOcrProcess> fileOcrProcesses);
void doPdfCRTask(List<FileOcrProcess> fileOcrProcesses);
} }

@ -33,6 +33,7 @@ import java.util.Map;
import java.util.*; import java.util.*;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
@Slf4j @Slf4j
@Service @Service
@ -433,21 +434,20 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
public String ocrAndExtract(String caseId, List<EvidenceDirectoryDTO> evidenceFileDTOS) { public String ocrAndExtract(String caseId, List<EvidenceDirectoryDTO> evidenceFileDTOS) {
List<CaseEvidenceDetailDTO> oldEvidences = this.queryEvidenceList(caseId); List<CaseEvidenceDetailDTO> oldEvidences = this.queryEvidenceList(caseId);
List<EvidenceDirectoryDTO> evidenceDirectoryDTOS = listFileTree(caseId); List<EvidenceDirectoryDTO> oldEvidenceDirectoryDTOS = listFileTree(caseId);
List<CaseEvidenceDetailDTO> newEvidences = toCaseCaseEvidenceDetailDTO(evidenceFileDTOS, evidenceDirectoryDTOS); List<CaseEvidenceDetailDTO> newEvidences = toCaseCaseEvidenceDetailDTO(evidenceFileDTOS, oldEvidenceDirectoryDTOS);
newEvidences.forEach(caseEvidenceDetailDTO -> caseEvidenceDetailDTO.setCaseId(caseId)); newEvidences.forEach(caseEvidenceDetailDTO -> caseEvidenceDetailDTO.setCaseId(caseId));
List<CaseEvidenceDetailDTO> operationalEvidence = findChangedEvidence(oldEvidences, newEvidences); List<CaseEvidenceDetailDTO> operationalEvidenceList = findChangedEvidence(oldEvidences, newEvidences);
String batchId = ((CaseEvidenceService)AopContext.currentProxy()).updateCaseEvidence(operationalEvidence); String batchId = ((CaseEvidenceService)AopContext.currentProxy()).updateCaseEvidence4NewTransaction(operationalEvidenceList);
for (CaseEvidenceDetailDTO caseEvidenceDetailDTO : operationalEvidence) { List<EvidenceFileDTO> evidenceFileDTOList = operationalEvidenceList.stream()
if (StrUtil.equalsAny(caseEvidenceDetailDTO.getUpdateStatus(),"-1","0")){ .flatMap(evidenceDetailDTO -> evidenceDetailDTO.getFileList().stream().filter(file -> StrUtil.equals(file.getUpdateStatus(),"1"))).toList();
// 只需要识别即可 ((CaseEvidenceService)AopContext.currentProxy()).preSyncSubmitOCR(evidenceFileDTOList);
fileOcrProcessService.syncSubmitOCR(List.of(caseEvidenceDetailDTO.getId()));
// todo:是否需要提取标题 for (CaseEvidenceDetailDTO caseEvidenceDetailDTO : operationalEvidenceList) {
}
if (StrUtil.equals(caseEvidenceDetailDTO.getUpdateStatus(),"1")){ if (StrUtil.equals(caseEvidenceDetailDTO.getUpdateStatus(),"1")){
// 需要分析ocr识别+标题提取) // 需要分析ocr识别+标题提取)
xxlJobService.executeTaskByJobHandler("evidenceAnalysis", caseEvidenceDetailDTO.getId()); xxlJobService.executeTaskByJobHandler("evidenceAnalysis", caseEvidenceDetailDTO.getId());
@ -458,9 +458,14 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
} }
/**
* note:
* @param caseEvidenceDetailDTOList
* @return
*/
@Override @Override
@Transactional(transactionManager = "dataSourceTransactionManager",propagation = Propagation.REQUIRES_NEW, rollbackFor = Exception.class) @Transactional(transactionManager = "dataSourceTransactionManager",propagation = Propagation.REQUIRES_NEW, rollbackFor = Exception.class)
public String updateCaseEvidence(List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOList) { public String updateCaseEvidence4NewTransaction(List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOList) {
String batchNo = DateTime.now().toString("yyyyMMddHHmmss"); String batchNo = DateTime.now().toString("yyyyMMddHHmmss");
for (CaseEvidenceDetailDTO evidence : caseEvidenceDetailDTOList) { for (CaseEvidenceDetailDTO evidence : caseEvidenceDetailDTOList) {
@ -471,7 +476,7 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
this.save(caseEvidence); this.save(caseEvidence);
evidence.setId(caseEvidence.getId()); evidence.setId(caseEvidence.getId());
for (EvidenceFileDTO evidenceFileDTO : evidence.getFileList()) { for (EvidenceFileDTO evidenceFileDTO : evidence.getFileList()) {
if (evidenceFileDTO.getUpdateStatus().equals("1")){ if (StrUtil.equalsAny(evidenceFileDTO.getUpdateStatus(),"1","2")){
// 新增 // 新增
EvidenceFile evidenceFile = new EvidenceFile(caseEvidence.getId(), evidenceFileDTO.getFileId()); EvidenceFile evidenceFile = new EvidenceFile(caseEvidence.getId(), evidenceFileDTO.getFileId());
evidenceFile.setDirectoryId(evidence.getDirectoryId()); evidenceFile.setDirectoryId(evidence.getDirectoryId());
@ -490,6 +495,17 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
}*/ }*/
} }
}else if (evidence.getUpdateStatus().equals("-1")){
for (EvidenceFileDTO evidenceFileDTO : evidence.getFileList()) {
if (StrUtil.equalsAny(evidenceFileDTO.getUpdateStatus(),"1","2")){
// 新增
EvidenceFile evidenceFile = new EvidenceFile(evidence.getId(), evidenceFileDTO.getFileId());
evidenceFile.setDirectoryId(evidence.getDirectoryId());
evidenceFile.setRank(evidenceFileDTO.getRank());
evidenceFile.setBatchNo(batchNo);
evidenceFileService.save(evidenceFile);
}
}
}/*else if (evidence.getUpdateStatus().equals("0")){ }/*else if (evidence.getUpdateStatus().equals("0")){
// 删除 // 删除
this.removeById(evidence.getId()); this.removeById(evidence.getId());
@ -667,6 +683,21 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
return directory1; return directory1;
} }
@Override
@Transactional(rollbackFor = Exception.class, propagation= Propagation.REQUIRES_NEW, transactionManager = "dataSourceTransactionManager")
public void preSyncSubmitOCR(List<EvidenceFileDTO> evidenceFileList) {
for (EvidenceFileDTO evidenceFileDTO : evidenceFileList) {
if (StrUtil.equalsAny(evidenceFileDTO.getFileType(),"doc","docx")){
fileOcrProcessService.syncSubmitOCR(List.of(evidenceFileDTO.getFileId()), fileOcrProcessService::doWordCRTask);
}else if (StrUtil.equalsAny(evidenceFileDTO.getFileType(),"pdf")){
fileOcrProcessService.syncSubmitOCR(List.of(evidenceFileDTO.getFileId()), fileOcrProcessService::doPdfCRTask);
}else {
fileOcrProcessService.syncSubmitOCR(List.of(evidenceFileDTO.getFileId()), fileOcrProcessService::doOCRTask);
}
}
}
private CaseEvidenceDetailDTO findEvidenceDetail(String evidenceId, List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOS) { private CaseEvidenceDetailDTO findEvidenceDetail(String evidenceId, List<CaseEvidenceDetailDTO> caseEvidenceDetailDTOS) {
for (CaseEvidenceDetailDTO caseEvidenceDetailDTO : caseEvidenceDetailDTOS) { for (CaseEvidenceDetailDTO caseEvidenceDetailDTO : caseEvidenceDetailDTOS) {
@ -677,8 +708,7 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
return null; return null;
} }
private List<CaseEvidenceDetailDTO> toCaseCaseEvidenceDetailDTO(List<EvidenceDirectoryDTO> newDirectoryDTOS, private List<CaseEvidenceDetailDTO> toCaseCaseEvidenceDetailDTO(List<EvidenceDirectoryDTO> newDirectoryDTOS,List<EvidenceDirectoryDTO> oldEvidenceDirectoryDTOS) {
List<EvidenceDirectoryDTO> evidenceDirectoryDTOS) {
if (CollUtil.isEmpty(newDirectoryDTOS)){ if (CollUtil.isEmpty(newDirectoryDTOS)){
return new ArrayList<>(); return new ArrayList<>();
} }
@ -692,7 +722,7 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
for (EvidenceDirectoryDTO evidenceFile : floatNewDirectoryDTOS) { for (EvidenceDirectoryDTO evidenceFile : floatNewDirectoryDTOS) {
String directoryId = evidenceFile.getId(); String directoryId = evidenceFile.getId();
EvidenceDirectoryDTO directory = evidenceDirectoryService.findDirectoryById(directoryId, evidenceDirectoryDTOS); EvidenceDirectoryDTO directory = evidenceDirectoryService.findDirectoryById(directoryId, oldEvidenceDirectoryDTOS);
if (null == directory){ if (null == directory){
log.warn("toCaseCaseEvidenceDetailDTO:目录id{}不存在对应的目录分类信息",directoryId); log.warn("toCaseCaseEvidenceDetailDTO:目录id{}不存在对应的目录分类信息",directoryId);
continue; continue;
@ -701,20 +731,28 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
if (directory.getLevel() == 1 || directory.getLevel() == 2){ if (directory.getLevel() == 1 || directory.getLevel() == 2){
for (String fileId : evidenceFile.getFileIdList()) { for (String fileId : evidenceFile.getFileIdList()) {
CaseEvidenceDetailDTO caseEvidenceDetailDTO = new CaseEvidenceDetailDTO(); CaseEvidenceDetailDTO caseEvidenceDetailDTO = new CaseEvidenceDetailDTO();
if (null != fileMap.get(fileId)){
// 证据名为文件名
caseEvidenceDetailDTO.setEvidenceName(fileMap.get(fileId).getFilename());
}
caseEvidenceDetailDTO.setEvidenceType(directory.getCategoryId()); caseEvidenceDetailDTO.setEvidenceType(directory.getCategoryId());
caseEvidenceDetailDTO.setDirectoryId(directoryId); caseEvidenceDetailDTO.setDirectoryId(directoryId);
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO(); EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(fileId); evidenceFileDTO.setFileId(fileId);
MinioFile minioFile = fileMap.get(fileId);
if (null != minioFile){
// 证据名为文件名
caseEvidenceDetailDTO.setEvidenceName(minioFile.getFilename());
evidenceFileDTO.setFileType(minioFile.getFileType());
evidenceFileDTO.setFileName(minioFile.getFilename());
}
caseEvidenceDetailDTO.setFileList(List.of(evidenceFileDTO)); caseEvidenceDetailDTO.setFileList(List.of(evidenceFileDTO));
caseEvidenceDetailDTOS.add(caseEvidenceDetailDTO); caseEvidenceDetailDTOS.add(caseEvidenceDetailDTO);
} }
} }
if (directory.getLevel() == 3){ if (directory.getLevel() == 3){
CaseEvidenceDetailDTO caseEvidenceDetailDTO = new CaseEvidenceDetailDTO(); CaseEvidenceDetailDTO caseEvidenceDetailDTO = new CaseEvidenceDetailDTO();
if (CollUtil.isNotEmpty(directory.getFileInfoList())){
String evidenceId = CollUtil.getFirst(directory.getFileInfoList()).getEvidenceId();
caseEvidenceDetailDTO.setId(evidenceId);
}
// 证据名为目录名 // 证据名为目录名
caseEvidenceDetailDTO.setEvidenceName(evidenceFile.getDirectoryName()); caseEvidenceDetailDTO.setEvidenceName(evidenceFile.getDirectoryName());
caseEvidenceDetailDTO.setEvidenceType(directory.getCategoryId()); caseEvidenceDetailDTO.setEvidenceType(directory.getCategoryId());
@ -723,6 +761,12 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
for (String fileId : evidenceFile.getFileIdList()) { for (String fileId : evidenceFile.getFileIdList()) {
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO(); EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(fileId); evidenceFileDTO.setFileId(fileId);
MinioFile minioFile = fileMap.get(fileId);
if (null != minioFile){
evidenceFileDTO.setFileName(minioFile.getFilename());
evidenceFileDTO.setFileType(minioFile.getFileType());
}
evidenceFileDTOS.add(evidenceFileDTO); evidenceFileDTOS.add(evidenceFileDTO);
} }
caseEvidenceDetailDTO.setFileList(evidenceFileDTOS); caseEvidenceDetailDTO.setFileList(evidenceFileDTOS);
@ -772,6 +816,11 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
if (CollUtil.isNotEmpty(oldEvidenceList) && CollUtil.isNotEmpty(newEvidenceFileList)){ if (CollUtil.isNotEmpty(oldEvidenceList) && CollUtil.isNotEmpty(newEvidenceFileList)){
// 数据库中和新数据都存在 // 数据库中和新数据都存在
Map<String, EvidenceFileDTO> fileCache = Stream.of(newEvidenceFileList, oldEvidenceList)
.flatMap(Collection::stream)
.flatMap(evidenceDetailDTO -> evidenceDetailDTO.getFileList().stream())
.collect(Collectors.toMap(EvidenceFileDTO::getFileId, Function.identity()));
for (CaseEvidenceDetailDTO oldEvidence : oldEvidenceList) { for (CaseEvidenceDetailDTO oldEvidence : oldEvidenceList) {
boolean isFind = false; boolean isFind = false;
for (CaseEvidenceDetailDTO newEvidence : newEvidenceFileList) { for (CaseEvidenceDetailDTO newEvidence : newEvidenceFileList) {
@ -780,7 +829,7 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
oldEvidence.setUpdateStatus("-1"); oldEvidence.setUpdateStatus("-1");
List<String> oldFileIds = oldEvidence.getFileList().stream().map(EvidenceFileDTO::getFileId).toList(); List<String> oldFileIds = oldEvidence.getFileList().stream().map(EvidenceFileDTO::getFileId).toList();
List<String> newFileIds = newEvidence.getFileList().stream().map(EvidenceFileDTO::getFileId).toList(); List<String> newFileIds = newEvidence.getFileList().stream().map(EvidenceFileDTO::getFileId).toList();
TupleIdRecord tupleIdRecord = compareFileList(oldFileIds, newFileIds); TupleIdRecord tupleIdRecord = compareFileList(newFileIds,oldFileIds);
List<EvidenceFileDTO> updateFileList = new ArrayList<>(); List<EvidenceFileDTO> updateFileList = new ArrayList<>();
// 新增的文件顺序排在原有文件的后面 // 新增的文件顺序排在原有文件的后面
@ -789,6 +838,10 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO(); EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(addFileId); evidenceFileDTO.setFileId(addFileId);
evidenceFileDTO.setUpdateStatus("1"); evidenceFileDTO.setUpdateStatus("1");
if (null != fileCache.get(addFileId)){
evidenceFileDTO.setFileType(fileCache.get(addFileId).getFileType());
}
evidenceFileDTO.setRank(initOrder + findRank(newFileIds, addFileId)); evidenceFileDTO.setRank(initOrder + findRank(newFileIds, addFileId));
updateFileList.add(evidenceFileDTO); updateFileList.add(evidenceFileDTO);
} }
@ -796,6 +849,9 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO(); EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(deleteFileId); evidenceFileDTO.setFileId(deleteFileId);
evidenceFileDTO.setUpdateStatus("0"); evidenceFileDTO.setUpdateStatus("0");
if (null != fileCache.get(deleteFileId)){
evidenceFileDTO.setFileType(fileCache.get(deleteFileId).getFileType());
}
evidenceFileDTO.setRank(findRank(oldFileIds, deleteFileId)); evidenceFileDTO.setRank(findRank(oldFileIds, deleteFileId));
updateFileList.add(evidenceFileDTO); updateFileList.add(evidenceFileDTO);
} }
@ -804,6 +860,9 @@ public class CaseEvidenceServiceImpl extends ServiceImpl<CaseEvidenceMapper, Cas
EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO(); EvidenceFileDTO evidenceFileDTO = new EvidenceFileDTO();
evidenceFileDTO.setFileId(updateFileId); evidenceFileDTO.setFileId(updateFileId);
evidenceFileDTO.setUpdateStatus("2"); evidenceFileDTO.setUpdateStatus("2");
if (null != fileCache.get(updateFileId)){
evidenceFileDTO.setFileType(fileCache.get(updateFileId).getFileType());
}
evidenceFileDTO.setRank(findRank(oldFileIds, updateFileId)); evidenceFileDTO.setRank(findRank(oldFileIds, updateFileId));
updateFileList.add(evidenceFileDTO); updateFileList.add(evidenceFileDTO);
} }

@ -1,6 +1,7 @@
package com.supervision.police.service.impl; package com.supervision.police.service.impl;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil; import cn.hutool.json.JSONUtil;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
@ -17,6 +18,8 @@ import com.supervision.police.service.FileOcrProcessService;
import com.supervision.police.mapper.FileOcrProcessMapper; import com.supervision.police.mapper.FileOcrProcessMapper;
import com.supervision.police.service.NoteRecordService; import com.supervision.police.service.NoteRecordService;
import com.supervision.police.service.OCRService; import com.supervision.police.service.OCRService;
import com.supervision.utils.PDFReadUtil;
import com.supervision.utils.WordReadUtil;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.aop.framework.AopContext; import org.springframework.aop.framework.AopContext;
@ -28,6 +31,7 @@ import org.springframework.transaction.annotation.Transactional;
import java.util.*; import java.util.*;
import java.util.function.Consumer; import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
/** /**
@ -61,6 +65,11 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
return submitOCR(fileIdList, this::doOCRTask); return submitOCR(fileIdList, this::doOCRTask);
} }
@Override
public List<FileOcrProcess> syncSubmitOCR(List<String> fileIdList, Consumer<List<FileOcrProcess>> consumer) {
return submitOCR(fileIdList, consumer);
}
private List<FileOcrProcess> submitOCR(List<String> fileIdList, Consumer<List<FileOcrProcess>> consumer){ private List<FileOcrProcess> submitOCR(List<String> fileIdList, Consumer<List<FileOcrProcess>> consumer){
if (CollUtil.isEmpty(fileIdList)){ if (CollUtil.isEmpty(fileIdList)){
@ -91,7 +100,8 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
log.debug("submitOCR:提交识别任务到异步处理器中..."); log.debug("submitOCR:提交识别任务到异步处理器中...");
consumer.accept(processList); consumer.accept(processList);
return super.lambdaQuery().in(FileOcrProcess::getFileId, fileIdList).list(); List<FileOcrProcess> resultList = super.lambdaQuery().in(FileOcrProcess::getFileId, fileIdList).list();
return sortByIdOrder(fileIdList,resultList, FileOcrProcess::getFileId);
} }
@Override @Override
@ -186,7 +196,7 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
@Override @Override
public List<RecordFileDTO> queryFileListWithIdSort(List<String> fileIdList) { public List<RecordFileDTO> queryFileListWithIdSort(List<String> fileIdList) {
List<RecordFileDTO> recordFileDTOS = this.queryFileList(fileIdList); List<RecordFileDTO> recordFileDTOS = this.queryFileList(fileIdList);
return sortByIdOrder(fileIdList, recordFileDTOS); return sortByIdOrder(fileIdList, recordFileDTOS, RecordFileDTO::getFileId);
} }
@Override @Override
@ -215,6 +225,44 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
return this.queryFileListWithIdSort(fileIdList); return this.queryFileListWithIdSort(fileIdList);
} }
@Override
public void doWordCRTask(List<FileOcrProcess> fileOcrProcesses) {
doMcr(fileOcrProcesses, (fileId)-> WordReadUtil.readWordInMinio(minioService, fileId));
}
@Override
public void doPdfCRTask(List<FileOcrProcess> fileOcrProcesses) {
doMcr(fileOcrProcesses, (fileId)-> PDFReadUtil.readPdfInMinio(minioService, fileId));
}
private void doMcr(List<FileOcrProcess> fileOcrProcesses, Function<String, String> function) {
log.info("doMcr:开始识别文件...{}",JSONUtil.toJsonStr(fileOcrProcesses));
if (CollUtil.isEmpty(fileOcrProcesses)){
log.info("doMcr:当前暂无识别的任务,结束...");
return;
}
for (FileOcrProcess ocrProcess : fileOcrProcesses) {
log.info("ocr:开始识别文件:{}", JSONUtil.toJsonStr(ocrProcess));
this.updateOCrStatus(List.of(ocrProcess.getFileId()),OcrProcessStatus.PROCESSING.getCode());
try {
String ocrText = function.apply(ocrProcess.getFileId());
Assert.notNull(ocrText, "识别结果为空");
this.lambdaUpdate().eq(FileOcrProcess::getFileId, ocrProcess.getFileId())
.set(FileOcrProcess::getStatus, OcrProcessStatus.PROCESSING.getCode())
.set(FileOcrProcess::getOcrText, ocrText).update();
} catch (Exception e) {
log.error("doMcr识别失败",e);
this.updateOCrStatus(List.of(ocrProcess.getFileId()),OcrProcessStatus.FAIL.getCode());
}
}
}
private List<OCRReqDTO> buildOCRReqDTO(List<String> fileIdList){ private List<OCRReqDTO> buildOCRReqDTO(List<String> fileIdList){
List<MinioFile> minioFiles = minioService.listMinioFile(fileIdList); List<MinioFile> minioFiles = minioService.listMinioFile(fileIdList);
@ -225,15 +273,15 @@ public class FileOcrProcessServiceImpl extends ServiceImpl<FileOcrProcessMapper,
} }
private List<RecordFileDTO> sortByIdOrder(List<String> fileIdList,List<RecordFileDTO> recordFileDTOList){ private <T> List<T> sortByIdOrder(List<String> idList,List<T> targetList, Function<T,String> function){
if (CollUtil.size(fileIdList) < 2 || CollUtil.size(recordFileDTOList) < 2) { if (CollUtil.size(idList) < 2 || CollUtil.size(targetList) < 2) {
return recordFileDTOList; return targetList;
} }
Map<String, RecordFileDTO> recordFileDTOMap = recordFileDTOList.stream() Map<String, T> targetMap = targetList.stream()
.collect(Collectors.toMap(RecordFileDTO::getFileId,recordFileDTO -> recordFileDTO, (k1, k2) -> k1)); .collect(Collectors.toMap(function,target -> target, (k1, k2) -> k1));
return fileIdList.stream().map(recordFileDTOMap::get).filter(Objects::nonNull).collect(Collectors.toList()); return idList.stream().map(targetMap::get).filter(Objects::nonNull).collect(Collectors.toList());
} }
} }

@ -73,7 +73,7 @@ public class LLMExtractServiceImpl implements LLMExtractService {
//判断content是否为JSON格式如果是则尝试转换为JSON格式并获取title属性的值 //判断content是否为JSON格式如果是则尝试转换为JSON格式并获取title属性的值
try { try {
JSONObject jsonObject = new JSONObject(content); JSONObject jsonObject = new JSONObject(content);
String title = jsonObject.getString("title"); String title = jsonObject.optString("title");
ocrExtractDto.setTitle(title); ocrExtractDto.setTitle(title);
result.add(ocrExtractDto); result.add(ocrExtractDto);
} catch (Exception e) { } catch (Exception e) {
@ -155,7 +155,7 @@ public class LLMExtractServiceImpl implements LLMExtractService {
log.info("属性提取结果: {}", jsonObject); log.info("属性提取结果: {}", jsonObject);
List<NotePromptExtractAttributesDto> extractAttributesList = LLMExtractDto.getExtractAttributes(); List<NotePromptExtractAttributesDto> extractAttributesList = LLMExtractDto.getExtractAttributes();
extractAttributesMap.forEach((key, value) -> { extractAttributesMap.forEach((key, value) -> {
String attrValue = jsonObject.getString(key); String attrValue = jsonObject.optString(key);
log.info("属性提取结果: 【{}】。Key【{}】", attrValue, key); log.info("属性提取结果: 【{}】。Key【{}】", attrValue, key);
NotePromptExtractAttributesDto extractAttributesDto = new NotePromptExtractAttributesDto(); NotePromptExtractAttributesDto extractAttributesDto = new NotePromptExtractAttributesDto();
extractAttributesDto.setAttrName(key); extractAttributesDto.setAttrName(key);

@ -1,5 +1,6 @@
package com.supervision.utils; package com.supervision.utils;
import com.supervision.minio.service.MinioService;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.PDFTextStripper;
@ -20,4 +21,14 @@ public class PDFReadUtil {
} }
return text; return text;
} }
public static String readPdfInMinio(MinioService minioService, String fileId) {
try (InputStream inputStream = minioService.getObjectInputStream(fileId)){
return pdf2text(inputStream);
} catch (Exception e) {
log.error("从minio中获取文件失败", e);
return null;
}
}
} }

@ -32,7 +32,7 @@
c.category_name as categoryName, c.category_name as categoryName,
c.icon_url as iconURL, c.icon_url as iconURL,
c.parent_id as parent_id, c.parent_id as parent_id,
np.name as name np.name as promptName
from evidence_category c from evidence_category c
left join note_prompt np on c.prompt_id = np.id left join note_prompt np on c.prompt_id = np.id
<where> <where>

Loading…
Cancel
Save