diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java index 5823a5a..02113e1 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java @@ -26,8 +26,10 @@ public class ErAttributeServiceImpl extends ServiceImpl erAttributes = this.listByDomainMetadataId(domainMetadataId); - boolean exists = erAttributes.stream().anyMatch(item -> StrUtil.equals(item.getAttrName(), erAttribute.getAttrName()) - && StrUtil.equals(item.getErLabel(), erAttribute.getErLabel())); + boolean exists = erAttributes.stream().anyMatch(item -> + StrUtil.equals(item.getAttrName(), erAttribute.getAttrName()) + && StrUtil.equals(item.getErLabel(), erAttribute.getErLabel()) + && StrUtil.equals(item.getErType(), erAttribute.getErType())); if (exists){ log.info("属性已存在,{},不进行保存...", erAttribute.getAttrName()); return; diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java index 82ba45b..84adbc0 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java @@ -204,18 +204,22 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { } log.info("开始意图元数据识别,切分文档id:{}", truncateDTO.getId()); interval.start("makeOutDomainMetadata"); - List domainMetadataDTOS = tripleConversionPipeline.makeOutDomainMetadata(truncateDTO, intents); - log.info("意图元数据识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(),interval.intervalMs("makeOutDomainMetadata")); - // 保存意图数据 + List> intentSplit = CollUtil.split(intents, 10); + log.info("切分意图列表,切分前数据总数:{},切分出:{}组数据", intents.size(), intentSplit.size()); + for (List intentList : intentSplit) { + // 每10个意图进行一次元数据识别 + List domainMetadataDTOS = tripleConversionPipeline.makeOutDomainMetadata(truncateDTO, intentList); + log.info("意图元数据识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(),interval.intervalMs("makeOutDomainMetadata")); + // 保存意图数据 + List intentions = intentionService.batchSaveIfAbsent(intents, pdfInfo.getDomainCategoryId(), pdfId.toString()); + for (Intention intention : intentions) { + List metadataDTOS = domainMetadataDTOS.stream() + .filter(d -> StrUtil.equals(d.getIntentDigest(), intention.getDigest())).toList(); + domainMetadataService.batchSaveOrUpdateMetadata(metadataDTOS,intention.getId(), pdfInfo.getDomainCategoryId()); + } + } intentSize ++; index ++; - - List intentions = intentionService.batchSaveIfAbsent(intents, pdfInfo.getDomainCategoryId(), pdfId.toString()); - for (Intention intention : intentions) { - List metadataDTOS = domainMetadataDTOS.stream() - .filter(d -> StrUtil.equals(d.getIntentDigest(), intention.getDigest())).toList(); - domainMetadataService.batchSaveOrUpdateMetadata(metadataDTOS,intention.getId(), pdfInfo.getDomainCategoryId()); - } }catch (Exception e){ index ++; log.error("切分文档id:{},意图识别失败", truncateDTO.getId(), e); diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java index 6320697..1b0830c 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java @@ -116,7 +116,12 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { log.info("makeOutDomainMetadata:format:{}", format); String call = aiCallService.call(format); log.info("makeOutDomainMetadata:响应结果:{}", call); - return parseDomainMetadataObj(call); + try { + return parseDomainMetadataObj(call); + } catch (Exception e) { + log.error("makeOutDomainMetadata:解析失败:{}", call); + throw new RuntimeException(e); + } } /** @@ -351,6 +356,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { TruncateDTO truncateDTO = new TruncateDTO(documentDTO, truncateTextBuild.toString()); truncateDTO.setLayoutType(String.valueOf(LayoutTypeEnum.TEXT.getCode()));//强制设置为文本类型 truncateDTOS.add(truncateDTO); + truncateTextBuild = new StringBuilder(INITIAL_BUFFER_SIZE); } // 提前抽取表名 TableTitleDTO tableTitleDTO = this.extractTableTitle(documentDTO.getTitle());