From bdb1c8558f2144954dbdc2ed6c8c2d35ad3ae025 Mon Sep 17 00:00:00 2001 From: xueqingkun Date: Tue, 11 Feb 2025 15:56:05 +0800 Subject: [PATCH] =?UTF-8?q?1.=20=E6=8C=87=E4=BB=A3=E6=B6=88=E9=99=A4?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E5=BC=80=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../impl/NoteRecordSplitServiceImpl.java | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java b/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java index 3b84efd..b151441 100644 --- a/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java +++ b/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java @@ -264,13 +264,24 @@ public class NoteRecordSplitServiceImpl extends ServiceImpl batchSaveRecordSplit(NoteRecord record, String fileId) { - String context = WordReadUtil.readWordInMinio(minioService, fileId); + String context; + MinioFile minioFile = minioService.getMinioFile(fileId); + if (StrUtil.equalsAnyIgnoreCase(minioFile.getFileType(),"doc","docx")){ + context = WordReadUtil.readWordInMinio(minioService, fileId); + }else { + // 这段代码是为了适应ocr重新提取 + List recordFileDTOS = fileOcrProcessService.queryFileList(List.of(fileId)); + if (CollUtil.isEmpty(recordFileDTOS)){ + log.error("文件id:{}未查询到ocr数据,不进行解析...", fileId); + return new ArrayList<>(); + } + RecordFileDTO recordFileDTO = CollUtil.getFirst(recordFileDTOS); + context = StrUtil.isNotEmpty(recordFileDTO.getReviseText()) ? recordFileDTO.getReviseText() : recordFileDTO.getOcrText(); + } if (StrUtil.isEmpty(context)) { log.info("文件id:{}内容为空,不进行解析...", fileId); return new ArrayList<>(); } - - MinioFile minioFile = minioService.getMinioFile(fileId); List qaList = RecordRegexUtil.recordRegex(context, record.getName()); log.info("文件:{}拆分问答对:{}", minioFile.getFilename(), qaList.size());