From 676fc2e06e197eddf53e5e448bbff613d95347f2 Mon Sep 17 00:00:00 2001 From: xueqingkun Date: Fri, 25 Oct 2024 09:22:04 +0800 Subject: [PATCH] =?UTF-8?q?1.=20=E4=BF=AE=E5=A4=8D=E8=AF=81=E6=8D=AE?= =?UTF-8?q?=E6=94=B9=E9=80=A0bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../police/dto/EvidenceDirectoryDTO.java | 20 +++ .../police/dto/EvidenceVerifyDTO.java | 4 + .../police/service/CaseEvidenceService.java | 7 +- .../police/service/FileOcrProcessService.java | 8 ++ .../service/impl/CaseEvidenceServiceImpl.java | 114 +++++++++--------- .../impl/FileOcrProcessServiceImpl.java | 12 ++ .../resources/mapper/EvidenceFileMapper.xml | 1 + 7 files changed, 100 insertions(+), 66 deletions(-) diff --git a/src/main/java/com/supervision/police/dto/EvidenceDirectoryDTO.java b/src/main/java/com/supervision/police/dto/EvidenceDirectoryDTO.java index 057b3e6..c34e1db 100644 --- a/src/main/java/com/supervision/police/dto/EvidenceDirectoryDTO.java +++ b/src/main/java/com/supervision/police/dto/EvidenceDirectoryDTO.java @@ -51,6 +51,10 @@ public class EvidenceDirectoryDTO { */ private List fileInfoList = new ArrayList<>(); + /** + * 证据信息,供前端使用。level为 1、2 级时evidenceInfo为空。为3级时不为空, + * 且与fileInfoList[evidenceInfo]保持一致 + */ private CaseEvidence evidenceInfo; private int level = -1; @@ -241,4 +245,20 @@ public class EvidenceDirectoryDTO { } + /** + * 重新防止证据信息 + */ + public void rePlaceEvidence(){ + if (CollUtil.isNotEmpty(this.fileInfoList) && null != this.evidenceInfo){ + for (EvidenceFileDTO evidenceFileDTO : this.fileInfoList) { + evidenceFileDTO.setEvidenceInfo(evidenceInfo); + } + } + if (CollUtil.isNotEmpty(this.child)){ + for (EvidenceDirectoryDTO directoryDTO : this.child) { + directoryDTO.rePlaceEvidence(); + } + } + } + } diff --git a/src/main/java/com/supervision/police/dto/EvidenceVerifyDTO.java b/src/main/java/com/supervision/police/dto/EvidenceVerifyDTO.java index 7229b05..5c634fb 100644 --- a/src/main/java/com/supervision/police/dto/EvidenceVerifyDTO.java +++ b/src/main/java/com/supervision/police/dto/EvidenceVerifyDTO.java @@ -19,6 +19,10 @@ public class EvidenceVerifyDTO { private String title; + private String provider; + + private String evidenceName; + private List properties; private String directoryId; diff --git a/src/main/java/com/supervision/police/service/CaseEvidenceService.java b/src/main/java/com/supervision/police/service/CaseEvidenceService.java index 2b0eae6..934f240 100644 --- a/src/main/java/com/supervision/police/service/CaseEvidenceService.java +++ b/src/main/java/com/supervision/police/service/CaseEvidenceService.java @@ -143,11 +143,6 @@ public interface CaseEvidenceService extends IService { EvidenceDirectory createDirectory(EvidenceDirectoryReqVO evidenceDirectory); - /** - * 预处理ocr任务 note:这个方法用来提前处理 word 、 pfd 类型的数据。是一个新的事务不会与前面的事务保持原子性 - * 异步调用 - * @param evidenceFileList 文件信息列表 - */ - void preAsyncSubmitOCR(List evidenceFileList); + void syncEvidenceAnalysis(List evidenceList); } diff --git a/src/main/java/com/supervision/police/service/FileOcrProcessService.java b/src/main/java/com/supervision/police/service/FileOcrProcessService.java index b8e1782..d4491d8 100644 --- a/src/main/java/com/supervision/police/service/FileOcrProcessService.java +++ b/src/main/java/com/supervision/police/service/FileOcrProcessService.java @@ -57,4 +57,12 @@ public interface FileOcrProcessService extends IService { void doPdfCRTask(List fileOcrProcesses); + + /** + * 多文件类型ocr识别 + * @param fileIds + * @param fileType + */ + List multipleTypeOcrProcess(List fileIds,String fileType); + } diff --git a/src/main/java/com/supervision/police/service/impl/CaseEvidenceServiceImpl.java b/src/main/java/com/supervision/police/service/impl/CaseEvidenceServiceImpl.java index 5b15b3e..55baa09 100644 --- a/src/main/java/com/supervision/police/service/impl/CaseEvidenceServiceImpl.java +++ b/src/main/java/com/supervision/police/service/impl/CaseEvidenceServiceImpl.java @@ -246,17 +246,21 @@ public class CaseEvidenceServiceImpl extends ServiceImpl evidenceFiles = evidenceFileService.lambdaQuery().eq(EvidenceFile::getEvidenceId, evidenceId).list(); + List evidenceFiles = evidenceFileService.listFileInfo(List.of(evidenceId)); if (evidenceFiles.isEmpty()) { log.error("证据id:【{}】不存在证据文件", evidenceId); return; } try { // 根据rank升序排序 - evidenceFiles.sort(Comparator.comparing(EvidenceFile::getRank)); - log.info("OCR识别开始。证据文件ID:【{}】", evidenceFiles.stream().map(EvidenceFile::getFileId).toList()); + evidenceFiles.sort(Comparator.comparing(EvidenceFileDTO::getRank)); + log.info("OCR识别开始。证据文件ID:【{}】", evidenceFiles.stream().map(EvidenceFileDTO::getFileId).toList()); long ocrStart = System.currentTimeMillis(); - List fileOcrProcesses = fileOcrProcessService.syncSubmitOCR(evidenceFiles.stream().map(EvidenceFile::getFileId).toList()); + List fileOcrProcesses = new ArrayList<>(); + for (EvidenceFileDTO evidenceFile : evidenceFiles) { + List fileOcrProcess = fileOcrProcessService.multipleTypeOcrProcess(List.of(evidenceFile.getEvidenceId()), evidenceFile.getFileType()); + fileOcrProcesses.addAll(fileOcrProcess); + } log.info("OCR识别完成。更新证据处理状态为【OCR识别完成】。耗时:【{}】ms", System.currentTimeMillis() - ocrStart); caseEvidence.setProcessStatus(EvidenceConstants.PROCESS_STATUS_OCR_OK); updateById(caseEvidence); @@ -443,43 +447,47 @@ public class CaseEvidenceServiceImpl extends ServiceImpl evidenceFileDTOList = operationalEvidenceList.stream().filter( - evidenceDetail-> !evidenceDetail.getFileList().stream().allMatch(file -> StrUtil.equals(file.getUpdateStatus(), "1")) - ) - .flatMap(evidenceDetailDTO -> evidenceDetailDTO.getFileList().stream().filter(file -> StrUtil.equals(file.getUpdateStatus(), "1"))).toList(); - ((CaseEvidenceService) AopContext.currentProxy()).preAsyncSubmitOCR(evidenceFileDTOList); - - for (CaseEvidenceDetailDTO caseEvidenceDetailDTO : operationalEvidenceList) { - if (StrUtil.equalsAny(caseEvidenceDetailDTO.getUpdateStatus(), "-1", "1")) { - boolean match = caseEvidenceDetailDTO.getFileList().stream().allMatch(file -> StrUtil.equals(file.getUpdateStatus(), "1")); - if (match) { - // 需要分析(ocr识别+标题提取) - log.info("ocrAndExtract:开始调用evidenceAnalysis接口...证据id:{},证据名:{}", caseEvidenceDetailDTO.getId(), caseEvidenceDetailDTO.getEvidenceName()); - xxlJobService.executeTaskByJobHandler("evidenceAnalysis", caseEvidenceDetailDTO.getId()); - } else { - log.info("ocrAndExtract:单独识别ocr数据"); - boolean success = false; - for (EvidenceFileDTO evidenceFileDTO : caseEvidenceDetailDTO.getFileList()) { - success = ocrEvidenceService.submitOrcTask(evidenceFileDTO.getFileId()); - if (!success) { - this.lambdaUpdate().eq(CaseEvidence::getId, evidenceFileDTO.getEvidenceId()) - .set(CaseEvidence::getProcessStatus, EvidenceConstants.PROCESS_STATUS_FAILED).update(); - break; - } - } + syncEvidenceAnalysis(operationalEvidenceList); - if (success) { - this.lambdaUpdate().eq(CaseEvidence::getId, caseEvidenceDetailDTO.getId()) - .set(CaseEvidence::getProcessStatus, EvidenceConstants.PROCESS_STATUS_SUCCESS).update(); - } + return batchId; - } + } - } + @Async + public void syncEvidenceAnalysis(List evidenceList) { + List ocrFileDTOList = evidenceList.stream().filter( + // 当前目录下不是所有文件都是新增 + evidenceDetail-> !evidenceDetail.getFileList().stream().allMatch(file -> StrUtil.equalsAny(file.getUpdateStatus(), "1", "-1")) + ) + .flatMap(evidenceDetailDTO -> evidenceDetailDTO.getFileList().stream().filter(file -> StrUtil.equals(file.getUpdateStatus(), "1"))).toList(); + if (CollUtil.isNotEmpty(ocrFileDTOList)){ + // 在已经存在的目录下追加文件,只需要重新及进行ocr识别 + log.info("ocrAndExtract:开始只进行文件内容识别..."); + Map> evidenceMap = ocrFileDTOList.stream().collect(Collectors.groupingBy(EvidenceFileDTO::getEvidenceId)); + for (Map.Entry> entry : evidenceMap.entrySet()) { + String evidenceId = entry.getKey(); + List value = entry.getValue(); + List fileOcrProcesses = fileOcrProcessService.multipleTypeOcrProcess(value.stream().map(EvidenceFileDTO::getFileId).toList(), evidenceId); + boolean match = fileOcrProcesses.stream().anyMatch(fileOcrProcess -> Integer.valueOf(2).equals(fileOcrProcess.getStatus())); + this.lambdaUpdate().eq(CaseEvidence::getId, evidenceId) + .set(CaseEvidence::getProcessStatus, + match ? EvidenceConstants.PROCESS_STATUS_FAILED : EvidenceConstants.PROCESS_STATUS_SUCCESS).update(); + } } - return batchId; + Set evidenceIds = evidenceList.stream().filter( + // 当前目录下不是所有文件都是新增 + evidenceDetail -> evidenceDetail.getFileList().stream().allMatch(file -> StrUtil.equalsAny(file.getUpdateStatus(), "1", "-1")) + ) + .flatMap(evidenceDetailDTO -> evidenceDetailDTO.getFileList().stream().filter(file -> StrUtil.equals(file.getUpdateStatus(), "1")) + .map(EvidenceFileDTO::getEvidenceId)).collect(Collectors.toSet()); + + for (String evidenceId : evidenceIds) { + // 文件目录下的所有文件都是新增的,需要重新提取分析 + log.info("ocrAndExtract:证据:{} 进行证据分析操作....", evidenceId); + xxlJobService.executeTaskByJobHandler("evidenceAnalysis", evidenceId); + } } /** @@ -502,11 +510,13 @@ public class CaseEvidenceServiceImpl extends ServiceImpl fileInfoList = directoryDTO.getFileInfoList(); - if (CollUtil.isNotEmpty(fileInfoList) && evidenceInfo != null) { - for (EvidenceFileDTO fileInfo : fileInfoList) { - fileInfo.setEvidenceInfo(evidenceInfo); - } - } - } + verifyEvidenceReqVO.getEvidenceDirectoryList().forEach(EvidenceDirectoryDTO::rePlaceEvidence); List evidenceVerifyDTOS = evidenceDirectoryList.stream() .flatMap(evidenceDirectoryDTO -> evidenceDirectoryDTO.listAllFile().stream()) @@ -634,6 +641,8 @@ public class CaseEvidenceServiceImpl extends ServiceImpl evidenceFileList) { - - for (EvidenceFileDTO evidenceFileDTO : evidenceFileList) { - if (StrUtil.equalsAny(evidenceFileDTO.getFileType(), "doc", "docx")) { - fileOcrProcessService.syncSubmitOCR(List.of(evidenceFileDTO.getFileId()), fileOcrProcessService::doWordCRTask); - } else if (StrUtil.equalsAny(evidenceFileDTO.getFileType(), "pdf")) { - fileOcrProcessService.syncSubmitOCR(List.of(evidenceFileDTO.getFileId()), fileOcrProcessService::doPdfCRTask); - } else { - fileOcrProcessService.syncSubmitOCR(List.of(evidenceFileDTO.getFileId()), fileOcrProcessService::doOCRTask); - } - } - } - private CaseEvidenceDetailDTO findEvidenceDetail(String evidenceId, List caseEvidenceDetailDTOS) { for (CaseEvidenceDetailDTO caseEvidenceDetailDTO : caseEvidenceDetailDTOS) { @@ -833,6 +826,7 @@ public class CaseEvidenceServiceImpl extends ServiceImpl PDFReadUtil.readPdfInMinio(minioService, fileId)); } + @Override + public List multipleTypeOcrProcess(List fileIds, String fileType) { + + if (StrUtil.equalsAny(fileType, "doc", "docx")){ + return this.syncSubmitOCR(fileIds, this::doWordCRTask); + }else if (StrUtil.equalsAny(fileType, "pdf")){ + return this.syncSubmitOCR(fileIds, this::doPdfCRTask); + }else { + return this.submitOCR(fileIds, this::doOCRTask); + } + } + private void doMcr(List fileOcrProcesses, Function function) { log.info("doMcr:开始识别文件...{}",JSONUtil.toJsonStr(fileOcrProcesses)); diff --git a/src/main/resources/mapper/EvidenceFileMapper.xml b/src/main/resources/mapper/EvidenceFileMapper.xml index 11c37d4..1fed88c 100644 --- a/src/main/resources/mapper/EvidenceFileMapper.xml +++ b/src/main/resources/mapper/EvidenceFileMapper.xml @@ -28,6 +28,7 @@ select ef.file_id as fileId, ef.evidence_id as evidenceId, + ef.`rank` as rank, mf.filename as filename, mf.file_type as fileType, op.ocr_text as ocrText