|
|
@ -264,13 +264,24 @@ public class NoteRecordSplitServiceImpl extends ServiceImpl<NoteRecordSplitMappe
|
|
|
|
@Override
|
|
|
|
@Override
|
|
|
|
public List<NoteRecordSplit> batchSaveRecordSplit(NoteRecord record, String fileId) {
|
|
|
|
public List<NoteRecordSplit> batchSaveRecordSplit(NoteRecord record, String fileId) {
|
|
|
|
|
|
|
|
|
|
|
|
String context = WordReadUtil.readWordInMinio(minioService, fileId);
|
|
|
|
String context;
|
|
|
|
|
|
|
|
MinioFile minioFile = minioService.getMinioFile(fileId);
|
|
|
|
|
|
|
|
if (StrUtil.equalsAnyIgnoreCase(minioFile.getFileType(),"doc","docx")){
|
|
|
|
|
|
|
|
context = WordReadUtil.readWordInMinio(minioService, fileId);
|
|
|
|
|
|
|
|
}else {
|
|
|
|
|
|
|
|
// 这段代码是为了适应ocr重新提取
|
|
|
|
|
|
|
|
List<RecordFileDTO> recordFileDTOS = fileOcrProcessService.queryFileList(List.of(fileId));
|
|
|
|
|
|
|
|
if (CollUtil.isEmpty(recordFileDTOS)){
|
|
|
|
|
|
|
|
log.error("文件id:{}未查询到ocr数据,不进行解析...", fileId);
|
|
|
|
|
|
|
|
return new ArrayList<>();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
RecordFileDTO recordFileDTO = CollUtil.getFirst(recordFileDTOS);
|
|
|
|
|
|
|
|
context = StrUtil.isNotEmpty(recordFileDTO.getReviseText()) ? recordFileDTO.getReviseText() : recordFileDTO.getOcrText();
|
|
|
|
|
|
|
|
}
|
|
|
|
if (StrUtil.isEmpty(context)) {
|
|
|
|
if (StrUtil.isEmpty(context)) {
|
|
|
|
log.info("文件id:{}内容为空,不进行解析...", fileId);
|
|
|
|
log.info("文件id:{}内容为空,不进行解析...", fileId);
|
|
|
|
return new ArrayList<>();
|
|
|
|
return new ArrayList<>();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
MinioFile minioFile = minioService.getMinioFile(fileId);
|
|
|
|
|
|
|
|
List<QARecordNodeDTO> qaList = RecordRegexUtil.recordRegex(context, record.getName());
|
|
|
|
List<QARecordNodeDTO> qaList = RecordRegexUtil.recordRegex(context, record.getName());
|
|
|
|
log.info("文件:{}拆分问答对:{}", minioFile.getFilename(), qaList.size());
|
|
|
|
log.info("文件:{}拆分问答对:{}", minioFile.getFilename(), qaList.size());
|
|
|
|
|
|
|
|
|
|
|
|