diff --git a/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java b/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java index 3b84efd..b151441 100644 --- a/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java +++ b/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java @@ -264,13 +264,24 @@ public class NoteRecordSplitServiceImpl extends ServiceImpl batchSaveRecordSplit(NoteRecord record, String fileId) { - String context = WordReadUtil.readWordInMinio(minioService, fileId); + String context; + MinioFile minioFile = minioService.getMinioFile(fileId); + if (StrUtil.equalsAnyIgnoreCase(minioFile.getFileType(),"doc","docx")){ + context = WordReadUtil.readWordInMinio(minioService, fileId); + }else { + // 这段代码是为了适应ocr重新提取 + List recordFileDTOS = fileOcrProcessService.queryFileList(List.of(fileId)); + if (CollUtil.isEmpty(recordFileDTOS)){ + log.error("文件id:{}未查询到ocr数据,不进行解析...", fileId); + return new ArrayList<>(); + } + RecordFileDTO recordFileDTO = CollUtil.getFirst(recordFileDTOS); + context = StrUtil.isNotEmpty(recordFileDTO.getReviseText()) ? recordFileDTO.getReviseText() : recordFileDTO.getOcrText(); + } if (StrUtil.isEmpty(context)) { log.info("文件id:{}内容为空,不进行解析...", fileId); return new ArrayList<>(); } - - MinioFile minioFile = minioService.getMinioFile(fileId); List qaList = RecordRegexUtil.recordRegex(context, record.getName()); log.info("文件:{}拆分问答对:{}", minioFile.getFilename(), qaList.size());