From 87dd27607863eff15519ec7ac9beed35f1a07b56 Mon Sep 17 00:00:00 2001 From: xueqingkun Date: Mon, 19 May 2025 17:48:50 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9F=BA=E4=BA=8E=E4=B8=89=E5=85=83=E7=BB=84?= =?UTF-8?q?=E6=8F=90=E5=8F=96=E9=A2=86=E5=9F=9F=E5=85=83=E6=95=B0=E6=8D=AE?= =?UTF-8?q?...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pdfqaserver/domain/ErAttribute.java | 5 - .../pdfqaserver/dto/DomainMetadataDTO.java | 8 +- .../pdfqaserver/dto/ERAttributeDTO.java | 11 ++ .../pdfqaserver/dto/IntentDTO.java | 31 ++++++ .../mapper/DocumentTruncationMapper.java | 3 + .../service/DocumentTruncationService.java | 3 + .../service/DomainMetadataService.java | 6 ++ .../service/ErAttributeService.java | 7 ++ .../pdfqaserver/service/IntentionService.java | 2 + .../service/KnowledgeGraphService.java | 6 ++ .../service/TripleConversionPipeline.java | 2 +- .../impl/DocumentTruncationServiceImpl.java | 5 + .../impl/DomainMetadataServiceImpl.java | 23 ++++ .../service/impl/ErAttributeServiceImpl.java | 24 +++++ .../service/impl/IntentionServiceImpl.java | 5 + .../impl/KnowledgeGraphServiceImpl.java | 102 +++++++++++++----- .../mapper/DocumentTruncationMapper.xml | 6 ++ .../resources/mapper/ErAttributeMapper.xml | 3 +- 18 files changed, 217 insertions(+), 35 deletions(-) diff --git a/src/main/java/com/supervision/pdfqaserver/domain/ErAttribute.java b/src/main/java/com/supervision/pdfqaserver/domain/ErAttribute.java index 6158757..26380b6 100644 --- a/src/main/java/com/supervision/pdfqaserver/domain/ErAttribute.java +++ b/src/main/java/com/supervision/pdfqaserver/domain/ErAttribute.java @@ -26,11 +26,6 @@ public class ErAttribute implements Serializable { /** * 属性名 */ - private String erName; - - /** - * 属性值类型 - */ private String attrName; /** diff --git a/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java index bd981d6..20ea41d 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java @@ -2,6 +2,8 @@ package com.supervision.pdfqaserver.dto; import com.supervision.pdfqaserver.domain.DomainMetadata; import lombok.Data; + +import java.util.ArrayList; import java.util.List; /** @@ -28,21 +30,21 @@ public class DomainMetadataDTO { */ private String sourceType; - private List sourceAttributes; + private List sourceAttributes = new ArrayList<>(); /** * 关系 */ private String relation; - private List relationAttributes; + private List relationAttributes = new ArrayList<>(); /** * 尾节点类型 */ private String targetType; - private List targetAttributes; + private List targetAttributes = new ArrayList<>(); /** * 数据来源:0=手动录入,1=系统自动 diff --git a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java index 936b319..9dd6a30 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java @@ -1,5 +1,6 @@ package com.supervision.pdfqaserver.dto; +import com.supervision.pdfqaserver.domain.ErAttribute; import lombok.Data; @Data @@ -31,4 +32,14 @@ public class ERAttributeDTO { * 节点 1 关系 2 */ private String erType; + + public ErAttribute toErAttribute() { + ErAttribute erAttribute = new ErAttribute(); + erAttribute.setId(this.id); + erAttribute.setDomainMetadataId(this.domainMetadataId); + erAttribute.setAttrName(this.attrName); + erAttribute.setAttrValueType(this.attrValueType); + erAttribute.setErType(this.erType); + return erAttribute; + } } diff --git a/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java index 86a9da6..720d2b2 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java @@ -1,5 +1,6 @@ package com.supervision.pdfqaserver.dto; +import com.supervision.pdfqaserver.domain.Intention; import lombok.Data; /** @@ -8,4 +9,34 @@ import lombok.Data; @Data public class IntentDTO { + private String id; + + /** + * 摘要 + */ + private String digest; + + /** + * 描述详情 + */ + private String desc; + + /** + * 领域分类id + */ + private String domainCategoryId; + + + /** + * 数据来源:0=手动录入,1=系统自动 + */ + private String generationType; + + public IntentDTO(Intention intention){ + this.id = intention.getId(); + this.digest = intention.getDigest(); + this.desc = intention.getDesc(); + this.domainCategoryId = intention.getDomainCategoryId(); + this.generationType = intention.getGenerationType(); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java index 92c228c..67e60be 100644 --- a/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java +++ b/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java @@ -3,6 +3,8 @@ package com.supervision.pdfqaserver.mapper; import com.supervision.pdfqaserver.domain.DocumentTruncation; import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import java.util.List; + /** * @author Administrator * @description 针对表【document_truncation(文档切分表)】的数据库操作Mapper @@ -11,6 +13,7 @@ import com.baomidou.mybatisplus.core.mapper.BaseMapper; */ public interface DocumentTruncationMapper extends BaseMapper { + List listByPdfId(Integer pdfId); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java b/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java index 64f9de8..00435c0 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java @@ -25,4 +25,7 @@ public interface DocumentTruncationService extends IService List queryByDocumentIds(List documentIds); List queryNotERETruncate(List documentIds); + + + List listByPdfId(Integer pdfId); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java index f8938dd..7887d2c 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java @@ -25,6 +25,12 @@ public interface DomainMetadataService extends IService { void saveIfNotExists(DomainMetadata metadata, String domainCategoryId); + /** + * 批量保存或更新领域元数据 + * @param metadatas + * @param intentionId + * @param domainCategoryId + */ void batchSaveOrUpdateMetadata(List metadatas,String intentionId,String domainCategoryId); void completeSave(DomainMetadataDTO domainMetadataDTO); diff --git a/src/main/java/com/supervision/pdfqaserver/service/ErAttributeService.java b/src/main/java/com/supervision/pdfqaserver/service/ErAttributeService.java index d8e03b3..98f1547 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/ErAttributeService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/ErAttributeService.java @@ -3,6 +3,8 @@ package com.supervision.pdfqaserver.service; import com.supervision.pdfqaserver.domain.ErAttribute; import com.baomidou.mybatisplus.extension.service.IService; +import java.util.List; + /** * @author Administrator * @description 针对表【er_attribute(实体关系属性表)】的数据库操作Service @@ -10,4 +12,9 @@ import com.baomidou.mybatisplus.extension.service.IService; */ public interface ErAttributeService extends IService { + + void saveIfAbsents(ErAttribute erAttribute, String domainMetadataId); + + List listByDomainMetadataId(String domainMetadataId); + } diff --git a/src/main/java/com/supervision/pdfqaserver/service/IntentionService.java b/src/main/java/com/supervision/pdfqaserver/service/IntentionService.java index 470811c..2c7862f 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/IntentionService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/IntentionService.java @@ -31,4 +31,6 @@ public interface IntentionService extends IService { * @return */ Intention queryByDigestAndDomainCategoryId(String digest, String domainCategoryId); + + List queryByDomainCategoryId(String domainCategoryId); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java b/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java index eb316fe..25df5bf 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java @@ -30,6 +30,12 @@ public interface KnowledgeGraphService { void generateGraphBaseTrain(Integer pdfId); + /** + * 获取三元组转换管道 + * @param contentType 文档内容类型 + * @param industry 行业 + * @return + */ TripleConversionPipeline getTripleConversionPipeline(String contentType,String industry); void generateGraph(List eredtoList); diff --git a/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java b/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java index a8504bf..a9571cf 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java +++ b/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java @@ -44,7 +44,7 @@ public interface TripleConversionPipeline { /** - * 识别出truncate的领域元数据 + * 识别出truncate的领域元数据, 训练时使用 * @param truncate 切分文档 * @return DomainMetadataDTO */ diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java index 4d8db68..d9e2736 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java @@ -68,6 +68,11 @@ public class DocumentTruncationServiceImpl extends ServiceImpl queryNotERETruncate(List documentIds) { return null; } + + @Override + public List listByPdfId(Integer pdfId) { + return super.baseMapper.listByPdfId(pdfId); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java index b9041ec..6349d70 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java @@ -5,6 +5,7 @@ import cn.hutool.core.lang.Assert; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.supervision.pdfqaserver.domain.DomainMetadata; import com.supervision.pdfqaserver.dto.DomainMetadataDTO; +import com.supervision.pdfqaserver.dto.ERAttributeDTO; import com.supervision.pdfqaserver.service.DomainMetadataService; import com.supervision.pdfqaserver.mapper.DomainMetadataMapper; import com.supervision.pdfqaserver.service.ErAttributeService; @@ -31,6 +32,7 @@ public class DomainMetadataServiceImpl extends ServiceImpl relationAttributes = metadata.getRelationAttributes(); + if (CollUtil.isNotEmpty(relationAttributes)){ + for (ERAttributeDTO relationAttribute : relationAttributes) { + relationAttribute.setDomainMetadataId(metadata.getId()); + relationAttribute.setErType("2"); + erAttributeService.saveIfAbsents(relationAttribute.toErAttribute(), metadata.getId()); + } + } + // 保存意图和领域元数据的节点属性 + List nodeAttributes = metadata.getSourceAttributes(); + nodeAttributes.addAll(metadata.getTargetAttributes()); + if (CollUtil.isNotEmpty(nodeAttributes)){ + for (ERAttributeDTO nodeAttribute : nodeAttributes) { + nodeAttribute.setDomainMetadataId(metadata.getId()); + nodeAttribute.setErType("1"); + erAttributeService.saveIfAbsents(nodeAttribute.toErAttribute(), metadata.getId()); + } + } } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java index 972303f..4d83236 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java @@ -1,20 +1,44 @@ package com.supervision.pdfqaserver.service.impl; +import cn.hutool.core.lang.Assert; +import cn.hutool.core.util.StrUtil; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.supervision.pdfqaserver.domain.ErAttribute; import com.supervision.pdfqaserver.service.ErAttributeService; import com.supervision.pdfqaserver.mapper.ErAttributeMapper; +import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; +import java.util.List; + /** * @author Administrator * @description 针对表【er_attribute(实体关系属性表)】的数据库操作Service实现 * @createDate 2025-05-14 15:23:54 */ +@Slf4j @Service public class ErAttributeServiceImpl extends ServiceImpl implements ErAttributeService{ + @Override + public void saveIfAbsents(ErAttribute erAttribute, String domainMetadataId) { + Assert.notEmpty(domainMetadataId, "领域分类id不能为空"); + List erAttributes = this.listByDomainMetadataId(domainMetadataId); + boolean exists = erAttributes.stream().anyMatch(item -> StrUtil.equals(item.getAttrName(), erAttribute.getAttrName()) + && StrUtil.equals(item.getAttrValueType(), erAttribute.getAttrValueType())); + if (exists){ + log.info("属性已存在,{},不进行保存...", erAttribute.getAttrName()); + return; + } + erAttribute.setDomainMetadataId(domainMetadataId); + super.save(erAttribute); + } + + @Override + public List listByDomainMetadataId(String domainMetadataId) { + return super.lambdaQuery().eq(ErAttribute::getDomainMetadataId, domainMetadataId).list(); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java index 60699f9..bf3c69f 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java @@ -60,6 +60,11 @@ public class IntentionServiceImpl extends ServiceImpl queryByDomainCategoryId(String domainCategoryId) { + return super.lambdaQuery().eq(Intention::getDomainCategoryId, domainCategoryId).list(); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java index 6b22fcb..9ed6d52 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java @@ -87,12 +87,18 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { PdfInfo pdfInfo = pdfInfoService.getByPdfId(pdfId); Assert.notNull(pdfInfo, "pdfId:{}没有找到对应的pdf信息", pdfId); if (null == pdfInfo.getTrainStatus()){ + // todo:训练异常,需要记录异常状态 log.info("pdfId:{}没有找到对应的pdf训练状态,开始识别文档训练状态...", pdfId); pdfInfoService.pdfToGraphStart(pdfId); if (StrUtil.isEmpty(pdfInfo.getContentType())){ log.info("pdfId:{}没有找到对应的pdf内容类型,开始识别文档内容类型...", pdfId); DocumentContentTypeEnum documentContentTypeEnum = tripleConversionPipeline.makeOutPdfContentType(pdfId); log.info("pdfId:{}识别文档内容类型完成,内容类型:{}", pdfId, documentContentTypeEnum.getType()); + if (StrUtil.isEmpty(documentContentTypeEnum.getType())){ + log.info("pdfId:{}没有找到对应的pdf内容类型,停止后续任务...", pdfId); + pdfInfoService.pdfTrainFail(pdfId); + return; + } pdfInfo.setContentType(documentContentTypeEnum.getType()); pdfInfoService.updateContentType(pdfId, documentContentTypeEnum.getType()); } @@ -100,6 +106,11 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { log.info("pdfId:{}没有找到对应的pdf行业,开始识别文档行业...", pdfId); String industry = tripleConversionPipeline.makeOutPdfIndustry(pdfId); log.info("pdfId:{}识别文档行业完成,行业:{}", pdfId, industry); + if (StrUtil.isEmpty(industry)){ + log.info("pdfId:{}没有找到对应的pdf行业,停止后续任务...", pdfId); + pdfInfoService.pdfTrainFail(pdfId); + return; + } pdfInfo.setDomainCategoryId(industry); pdfInfoService.updateCategory(pdfId, industry); } @@ -107,43 +118,86 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { TripleConversionPipeline tripleConversionPipeline = this.getTripleConversionPipeline(pdfInfo.getContentType(), pdfInfo.getDomainCategoryId()); List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(pdfId); + if (CollUtil.isEmpty(pdfAnalysisOutputs)){ + log.warn("没有找到pdfId为{}的pdf分析结果,不再进行下一步操作...", pdfId); + return; + } List documentIds = pdfAnalysisOutputs.stream().map(p->String.valueOf(p.getId())).collect(Collectors.toList()); List documentTruncations = documentTruncationService.queryByDocumentIds(documentIds); - List truncateDTOS = new ArrayList<>(); if (CollUtil.isNotEmpty(documentTruncations)){ - log.info("没有找到文档切分数据,pdfId:{},不用重置数据...", pdfId); - truncateDTOS = documentTruncations.stream().map(TruncateDTO::new).collect(Collectors.toList()); - } - if (CollUtil.isEmpty(documentTruncations)){ - log.info("开始切割文档切片,pdfId:{}", pdfId); - List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).collect(Collectors.toList()); - truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList); - log.info("切割文档切片完成,切片个数:{}", truncateDTOS.size()); - // 保存分片信息 - documentTruncationService.batchSave(truncateDTOS); + log.info("文档切分数据不为空,pdfId:{},清除切分数据...", pdfId); + documentTruncationService.deleteByDocumentIds(documentIds); } + log.info("开始切割文档切片,pdfId:{}", pdfId); + List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).collect(Collectors.toList()); + List truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList); + log.info("切割文档切片完成,切片个数:{}", truncateDTOS.size()); + // 保存分片信息 + documentTruncationService.batchSave(truncateDTOS); + for (TruncateDTO truncateDTO : truncateDTOS) { - List intents = tripleConversionPipeline.makeOutTruncationIntent(truncateDTO); - List domainMetadataDTOS = tripleConversionPipeline.makeOutDomainMetadata(truncateDTO, intents); - // 保存意图数据 - List intentions = intentionService.batchSaveIfAbsent(intents, pdfInfo.getDomainCategoryId(), pdfId.toString()); - - for (Intention intention : intentions) { - List metadataDTOS = domainMetadataDTOS.stream() - .filter(d -> StrUtil.equals(d.getIntentDigest(), intention.getDigest())).toList(); - domainMetadataService.batchSaveOrUpdateMetadata(metadataDTOS,intention.getId(), pdfInfo.getDomainCategoryId()); + try { + List intents = tripleConversionPipeline.makeOutTruncationIntent(truncateDTO); + List domainMetadataDTOS = tripleConversionPipeline.makeOutDomainMetadata(truncateDTO, intents); + // 保存意图数据 + List intentions = intentionService.batchSaveIfAbsent(intents, pdfInfo.getDomainCategoryId(), pdfId.toString()); + for (Intention intention : intentions) { + List metadataDTOS = domainMetadataDTOS.stream() + .filter(d -> StrUtil.equals(d.getIntentDigest(), intention.getDigest())).toList(); + domainMetadataService.batchSaveOrUpdateMetadata(metadataDTOS,intention.getId(), pdfInfo.getDomainCategoryId()); + } + }catch (Exception e){ + log.error("切分文档id:{},意图识别失败", truncateDTO.getId(), e); } - } - - - } @Override public void generateGraphBaseTrain(Integer pdfId) { + Assert.notNull(pdfId, "pdfId不能为空"); + PdfInfo pdfInfo = pdfInfoService.getByPdfId(pdfId); + Assert.notNull(pdfInfo, "pdfId:{}没有找到对应的pdf信息", pdfId); + Assert.isTrue((null !=pdfInfo.getTrainStatus() && pdfInfo.getTrainStatus() == 1), + "pdfId:{}的pdf训练状态:{} 不符合要求", pdfId, pdfInfo.getTrainStatus()); + + List truncateDTOS = documentTruncationService.listByPdfId(pdfId).stream().map(TruncateDTO::new).collect(Collectors.toList()); + TripleConversionPipeline conversionPipeline = this.getTripleConversionPipeline(pdfInfo.getContentType(), pdfInfo.getDomainCategoryId()); + if (CollUtil.isEmpty(truncateDTOS)){ + log.info("没有找到pdfId为{}的文档切分数据,开始切分数据...", pdfId); + List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(pdfId); + List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).collect(Collectors.toList()); + truncateDTOS = conversionPipeline.sliceDocuments(documentDTOList); + documentTruncationService.batchSave(truncateDTOS); + log.info("切分数据完成,切分个数:{}", truncateDTOS.size()); + } + log.info("开始命名实体识别,切分文档个数:{}", truncateDTOS.size()); + // 查询当前行业分类下的意图 + List intentionDTOs = intentionService.queryByDomainCategoryId(pdfInfo.getDomainCategoryId()).stream().map(IntentDTO::new).distinct().toList(); + if (CollUtil.isEmpty(intentionDTOs)){ + log.info("没有找到行业分类id为{}的意图数据,不再进行下一步操作...", pdfInfo.getDomainCategoryId()); + return; + } + + for (TruncateDTO truncateDTO : truncateDTOS) { + try { + List intents = conversionPipeline.makeOutTruncationIntent(truncateDTO,intentionDTOs); + if (CollUtil.isEmpty(intents)){ + log.info("切分文档id:{},未正确识别出意图...", truncateDTO.getId()); + continue; + } + EREDTO eredto = conversionPipeline.doEre(truncateDTO, intents); + if (null == eredto){ + log.info("切分文档id:{},命名实体识别结果为空...", truncateDTO.getId()); + continue; + } + // 保存实体关系抽取结果 + this.saveERE(eredto, truncateDTO.getId()); + }catch (Exception e){ + log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e); + } + } } diff --git a/src/main/resources/mapper/DocumentTruncationMapper.xml b/src/main/resources/mapper/DocumentTruncationMapper.xml index d32cbb8..7d2df12 100644 --- a/src/main/resources/mapper/DocumentTruncationMapper.xml +++ b/src/main/resources/mapper/DocumentTruncationMapper.xml @@ -20,4 +20,10 @@ layout_type,content,create_time, update_time + diff --git a/src/main/resources/mapper/ErAttributeMapper.xml b/src/main/resources/mapper/ErAttributeMapper.xml index 32e648d..ea156cd 100644 --- a/src/main/resources/mapper/ErAttributeMapper.xml +++ b/src/main/resources/mapper/ErAttributeMapper.xml @@ -7,7 +7,6 @@ - @@ -16,7 +15,7 @@ - id,domain_metadata_id,er_name, + id,domain_metadata_id, attr_name,attr_value_type,er_type, create_time,update_time