From 307793842e68723073cbc7c946f47c77ec62f0d5 Mon Sep 17 00:00:00 2001 From: xueqingkun Date: Fri, 16 May 2025 17:58:06 +0800 Subject: [PATCH] =?UTF-8?q?=E9=A2=86=E5=9F=9F=E5=85=83=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E6=8F=90=E5=8F=96=E4=B8=AD..?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../constant/DocumentContentTypeEnum.java | 35 +++++++ .../domain/ChineseEnglishWords.java | 1 + .../pdfqaserver/domain/ErAttribute.java | 4 - .../pdfqaserver/domain/Intention.java | 10 +- .../pdfqaserver/domain/PdfInfo.java | 2 +- .../pdfqaserver/dto/DomainMetadataDTO.java | 32 ++++++- .../pdfqaserver/dto/ERAttributeDTO.java | 54 ++--------- .../supervision/pdfqaserver/dto/EREDTO.java | 32 ++++--- .../pdfqaserver/dto/EntityExtractionDTO.java | 4 +- .../pdfqaserver/dto/IntentDTO.java | 11 +++ .../dto/RelationExtractionDTO.java | 4 +- .../pdfqaserver/dto/TruncateDTO.java | 17 ++++ .../dto/TruncationERAttributeDTO.java | 68 ++++++++++++++ .../pdfqaserver/service/AiCallService.java | 10 ++ .../service/DomainMetadataService.java | 28 +++++- .../IntentionDomainMetadataService.java | 6 ++ .../pdfqaserver/service/IntentionService.java | 21 +++++ .../service/IntentionTruncationService.java | 2 + .../service/KnowledgeGraphService.java | 16 ++++ .../pdfqaserver/service/PdfInfoService.java | 16 ++++ .../service/TripleConversionPipeline.java | 57 +++++++++++- .../impl/DomainMetadataServiceImpl.java | 70 ++++++++++++++ .../IntentionDomainMetadataServiceImpl.java | 35 +++++++ .../service/impl/IntentionServiceImpl.java | 45 +++++++++ .../impl/IntentionTruncationServiceImpl.java | 14 +++ .../impl/KnowledgeGraphServiceImpl.java | 92 +++++++++++++++++-- .../service/impl/OllamaCallServiceImpl.java | 19 ++++ .../service/impl/PdfInfoServiceImpl.java | 46 ++++++++++ .../impl/TripleConversionPipelineImpl.java | 79 ++++++++++++++-- .../impl/TripleToCypherExecutorImpl.java | 6 +- ...TruncationEntityExtractionServiceImpl.java | 6 +- ...uncationRelationExtractionServiceImpl.java | 4 +- .../resources/mapper/ErAttributeMapper.xml | 3 +- src/main/resources/mapper/IntentionMapper.xml | 3 +- 34 files changed, 749 insertions(+), 103 deletions(-) create mode 100644 src/main/java/com/supervision/pdfqaserver/constant/DocumentContentTypeEnum.java create mode 100644 src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java create mode 100644 src/main/java/com/supervision/pdfqaserver/dto/TruncationERAttributeDTO.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/AiCallService.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/OllamaCallServiceImpl.java diff --git a/src/main/java/com/supervision/pdfqaserver/constant/DocumentContentTypeEnum.java b/src/main/java/com/supervision/pdfqaserver/constant/DocumentContentTypeEnum.java new file mode 100644 index 0000000..3bc5d6f --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/constant/DocumentContentTypeEnum.java @@ -0,0 +1,35 @@ +package com.supervision.pdfqaserver.constant; + +import lombok.Getter; + +/** + * 文档内容类型 + */ +@Getter +public enum DocumentContentTypeEnum { + + /** + * 研报 + */ + REPORT("0", "研报"), + + /** + * 对话 + */ + DIALOGUE("1", "对话"), + + /** + * 记录 + */ + RECORD("2", "记录"); + + + DocumentContentTypeEnum(String type, String desc) { + this.type = type; + this.desc = desc; + } + + private final String type; + + private final String desc; +} diff --git a/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java b/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java index 6d11b7c..92cb9e3 100644 --- a/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java +++ b/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java @@ -10,6 +10,7 @@ import lombok.Data; * 中英文对照字典 * @TableName chinese_english_words */ +@Deprecated(since = "v_0.0.2") @TableName(value ="chinese_english_words") @Data public class ChineseEnglishWords implements Serializable { diff --git a/src/main/java/com/supervision/pdfqaserver/domain/ErAttribute.java b/src/main/java/com/supervision/pdfqaserver/domain/ErAttribute.java index 7cc1da7..6158757 100644 --- a/src/main/java/com/supervision/pdfqaserver/domain/ErAttribute.java +++ b/src/main/java/com/supervision/pdfqaserver/domain/ErAttribute.java @@ -43,10 +43,6 @@ public class ErAttribute implements Serializable { */ private String erType; - /** - * - */ - private String parentId; /** * 创建时间 diff --git a/src/main/java/com/supervision/pdfqaserver/domain/Intention.java b/src/main/java/com/supervision/pdfqaserver/domain/Intention.java index e469cad..0b14b54 100644 --- a/src/main/java/com/supervision/pdfqaserver/domain/Intention.java +++ b/src/main/java/com/supervision/pdfqaserver/domain/Intention.java @@ -6,8 +6,8 @@ import java.time.LocalDateTime; import lombok.Data; /** - * - * @TableName intention + * 一个digest 和一个 domainCategoryId 组成一个意图 + * @TableName intention 意图表 */ @TableName(value ="intention") @Data @@ -33,6 +33,12 @@ public class Intention implements Serializable { */ private String domainCategoryId; + + /** + * 数据来源:0=手动录入,1=系统自动 + */ + private String generationType; + /** * 创建时间 */ diff --git a/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java b/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java index d669e35..9b26498 100644 --- a/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java +++ b/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java @@ -56,7 +56,7 @@ public class PdfInfo implements Serializable { /** - * 训练状态 0:未训练 1:训练成功 2:训练失败 + * 训练状态 0:开始训练 1:训练成功 2:训练失败 */ private Integer trainStatus; diff --git a/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java index bf275f3..bd981d6 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java @@ -1,35 +1,63 @@ package com.supervision.pdfqaserver.dto; +import com.supervision.pdfqaserver.domain.DomainMetadata; import lombok.Data; +import java.util.List; +/** + * 领域元数据DTO + */ @Data public class DomainMetadataDTO { private String id; /** - * 领域类型 + * 意图摘要 */ - private String domainType; + private String intentDigest; + + + /** + * 领域分类id + */ + private String domainCategoryId; /** * 头节点类型 */ private String sourceType; + private List sourceAttributes; + /** * 关系 */ private String relation; + private List relationAttributes; + /** * 尾节点类型 */ private String targetType; + private List targetAttributes; + /** * 数据来源:0=手动录入,1=系统自动 */ private String generationType; + public DomainMetadata toDomainMetadata() { + DomainMetadata domainMetadata = new DomainMetadata(); + domainMetadata.setId(this.id); + domainMetadata.setSourceType(this.sourceType); + domainMetadata.setTargetType(this.targetType); + domainMetadata.setRelation(this.relation); + domainMetadata.setDomainCategoryId(this.domainCategoryId); + domainMetadata.setGenerationType(this.generationType); + return domainMetadata; + } + } diff --git a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java index e90f037..936b319 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java @@ -1,68 +1,34 @@ package com.supervision.pdfqaserver.dto; -import com.supervision.pdfqaserver.domain.TruncationErAttribute; import lombok.Data; -/** - * 实体属性 - */ @Data public class ERAttributeDTO { private String id; /** - * 片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id + * 领域分类id */ - private String terId; + private String domainMetadataId; /** - * 类型 0:terId关联的id为实体 1:terId关联的id为关系 + * 属性名 */ - private String associationType; + private String erName; /** - * 属性名 + * 属性值类型 */ - private String attribute; - - private String attributeEn; + private String attrName; /** - * 属性值 + * 属性值类型 */ - private String value; + private String attrValueType; /** - * 数据类型 0:字符串 1:数字 + * 节点 1 关系 2 */ - private String dataType; - - public ERAttributeDTO() { - } - - public ERAttributeDTO(TruncationErAttribute truncationErAttribute) { - this.id = truncationErAttribute.getId(); - this.terId = truncationErAttribute.getTerId(); - this.associationType = truncationErAttribute.getAssociationType(); - this.attribute = truncationErAttribute.getAttribute(); - this.value = truncationErAttribute.getValue(); - this.dataType = truncationErAttribute.getDataType(); - } - - public ERAttributeDTO(String attribute, String value, String dataType) { - this.attribute = attribute; - this.value = value; - this.dataType = dataType; - } - - public TruncationErAttribute toTruncationErAttribute() { - TruncationErAttribute truncationErAttribute = new TruncationErAttribute(); - truncationErAttribute.setTerId(this.terId); - truncationErAttribute.setAssociationType(this.associationType); - truncationErAttribute.setAttribute(this.attribute); - truncationErAttribute.setValue(this.value); - truncationErAttribute.setDataType(this.dataType); - return truncationErAttribute; - } + private String erType; } diff --git a/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java index 3707b85..f54f67b 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java @@ -43,16 +43,17 @@ public class EREDTO { name = StrUtil.trim(name); type = StrUtil.trim(type); JSONObject attributes = nodeJson.getJSONObject("attributes"); - List erAttributeDTOS = new ArrayList<>(); + List truncationErAttributeDTOS = new ArrayList<>(); if (CollUtil.isNotEmpty(attributes)){ for (String key : attributes.keySet()) { Object value = attributes.get(key); String valueString = attributes.getString(key); - ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0"); - erAttributeDTOS.add(erAttributeDTO); + TruncationERAttributeDTO truncationErAttributeDTO = new TruncationERAttributeDTO(key, valueString, value instanceof Number?"1":"0"); + truncationErAttributeDTO.setAssociationType("0"); + truncationErAttributeDTOS.add(truncationErAttributeDTO); } } - EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,type,name, erAttributeDTOS); + EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,type,name, truncationErAttributeDTOS); entities.add(entityExtraction); } } @@ -63,7 +64,7 @@ public class EREDTO { String target = relationJson.getString("target"); String type = relationJson.getString("type"); JSONObject attributes = relationJson.getJSONObject("attributes"); - List erAttributeDTOS = new ArrayList<>(); + List truncationErAttributeDTOS = new ArrayList<>(); if (CollUtil.isNotEmpty(attributes)){ for (String key : attributes.keySet()) { if (StrUtil.isBlank(key)){ @@ -77,8 +78,9 @@ public class EREDTO { value = StrUtil.trim((String) value); } String valueString = attributes.getString(key); - ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0"); - erAttributeDTOS.add(erAttributeDTO); + TruncationERAttributeDTO truncationErAttributeDTO = new TruncationERAttributeDTO(key, valueString, value instanceof Number?"1":"0"); + truncationErAttributeDTO.setAssociationType("1"); + truncationErAttributeDTOS.add(truncationErAttributeDTO); } } if (StrUtil.isBlank(source) || StrUtil.isBlank(target)){ @@ -98,7 +100,7 @@ public class EREDTO { continue; } RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncationId,source, - sourceTypeOpt.get().getEntity(),type,target,targetTypeOpt.get().getEntity(), erAttributeDTOS); + sourceTypeOpt.get().getEntity(),type,target,targetTypeOpt.get().getEntity(), truncationErAttributeDTOS); relationsList.add(relationExtractionDTO); } } @@ -127,7 +129,7 @@ public class EREDTO { // 避免表格行名重复 entityExtractionDTO.setName("行-" + RandomUtil.randomString(UUID.randomUUID().toString(), 10)); entityExtractionDTO.setTruncationId(truncationId); - List erAttributeDTOS = new ArrayList<>(); + List truncationErAttributeDTOS = new ArrayList<>(); for (Map.Entry tableEntry : tableJson.entrySet()) { String key = tableEntry.getKey(); if (StrUtil.isBlank(key)){ @@ -141,10 +143,10 @@ public class EREDTO { } value = StrUtil.trim((String) value); } - ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, value.toString(), value instanceof Number ? "1" : "0"); - erAttributeDTOS.add(erAttributeDTO); + TruncationERAttributeDTO truncationErAttributeDTO = new TruncationERAttributeDTO(key, value.toString(), value instanceof Number ? "1" : "0"); + truncationErAttributeDTOS.add(truncationErAttributeDTO); } - entityExtractionDTO.setAttributes(erAttributeDTOS); + entityExtractionDTO.setAttributes(truncationErAttributeDTOS); entities.add(entityExtractionDTO); } eredto.setEntities(entities); @@ -161,7 +163,7 @@ public class EREDTO { first.ifPresent(chineseEnglishWords -> entity.setEntityEn(chineseEnglishWords.getEnglishWord())); if (CollUtil.isNotEmpty(entity.getAttributes())){ - for (ERAttributeDTO attribute : entity.getAttributes()) { + for (TruncationERAttributeDTO attribute : entity.getAttributes()) { setAttributeEn(attribute, wordsList); } } @@ -180,7 +182,7 @@ public class EREDTO { targetTypeFirst.ifPresent(chineseEnglishWords -> relation.setTargetTypeEn(chineseEnglishWords.getEnglishWord())); if (CollUtil.isNotEmpty(relation.getAttributes())){ - for (ERAttributeDTO attribute : relation.getAttributes()) { + for (TruncationERAttributeDTO attribute : relation.getAttributes()) { setAttributeEn(attribute, wordsList); } } @@ -188,7 +190,7 @@ public class EREDTO { } - private void setAttributeEn(ERAttributeDTO attribute,List wordsList) { + private void setAttributeEn(TruncationERAttributeDTO attribute, List wordsList) { if (null == attribute || CollUtil.isEmpty(wordsList)){ return; } diff --git a/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java index ca97a98..71cdc10 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java @@ -31,7 +31,7 @@ public class EntityExtractionDTO { */ private String name; - private List attributes = new ArrayList<>(); + private List attributes = new ArrayList<>(); public EntityExtractionDTO() { } @@ -43,7 +43,7 @@ public class EntityExtractionDTO { this.name = entityExtraction.getName(); } - public EntityExtractionDTO(String truncationId, String entity, String name, List attributes) { + public EntityExtractionDTO(String truncationId, String entity, String name, List attributes) { this.truncationId = truncationId; this.entity = entity; this.name = name; diff --git a/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java new file mode 100644 index 0000000..86a9da6 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java @@ -0,0 +1,11 @@ +package com.supervision.pdfqaserver.dto; + +import lombok.Data; + +/** + * 意图实体 + */ +@Data +public class IntentDTO { + +} diff --git a/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java index 212f51e..b446a9b 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java @@ -46,7 +46,7 @@ public class RelationExtractionDTO { private String targetTypeEn; - private List attributes; + private List attributes; public RelationExtractionDTO() { } @@ -61,7 +61,7 @@ public class RelationExtractionDTO { this.targetType = relationExtraction.getTargetType(); } - public RelationExtractionDTO(String truncationId, String source, String sourceType, String relation, String target, String targetType, List attributes) { + public RelationExtractionDTO(String truncationId, String source, String sourceType, String relation, String target, String targetType, List attributes) { this.truncationId = truncationId; this.source = source; this.relation = relation; diff --git a/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java index 1b0ed70..ce5605e 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java @@ -51,6 +51,23 @@ public class TruncateDTO { this.content = documentDTO.getContent(); } + public TruncateDTO(DocumentDTO documentDTO,String content) { + this.documentId = Integer.parseInt(documentDTO.getId()); + this.sectionId = documentDTO.getSectionId(); + this.layoutType = documentDTO.getLayoutType().toString(); + this.title = documentDTO.getTitle(); + this.content = content; + } + + public TruncateDTO(DocumentTruncation documentTruncation) { + this.id = documentTruncation.getId(); + this.documentId = documentTruncation.getDocumentId(); + this.sectionId = documentTruncation.getSectionId(); + this.layoutType = documentTruncation.getLayoutType(); + this.title = documentTruncation.getTitle(); + this.content = documentTruncation.getContent(); + } + public DocumentTruncation toDocumentTruncation() { DocumentTruncation truncation = new DocumentTruncation(); truncation.setDocumentId(this.documentId); diff --git a/src/main/java/com/supervision/pdfqaserver/dto/TruncationERAttributeDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/TruncationERAttributeDTO.java new file mode 100644 index 0000000..8efcf62 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/dto/TruncationERAttributeDTO.java @@ -0,0 +1,68 @@ +package com.supervision.pdfqaserver.dto; + +import com.supervision.pdfqaserver.domain.TruncationErAttribute; +import lombok.Data; + +/** + * 实体属性 + */ +@Data +public class TruncationERAttributeDTO { + + private String id; + + /** + * 片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id + */ + private String terId; + + /** + * 类型 0:terId关联的id为实体 1:terId关联的id为关系 + */ + private String associationType; + + /** + * 属性名 + */ + private String attribute; + + private String attributeEn; + + /** + * 属性值 + */ + private String value; + + /** + * 数据类型 0:字符串 1:数字 + */ + private String dataType; + + public TruncationERAttributeDTO() { + } + + public TruncationERAttributeDTO(TruncationErAttribute truncationErAttribute) { + this.id = truncationErAttribute.getId(); + this.terId = truncationErAttribute.getTerId(); + this.associationType = truncationErAttribute.getAssociationType(); + this.attribute = truncationErAttribute.getAttribute(); + this.value = truncationErAttribute.getValue(); + this.dataType = truncationErAttribute.getDataType(); + } + + public TruncationERAttributeDTO(String attribute, String value, String dataType) { + this.attribute = attribute; + this.value = value; + this.dataType = dataType; + } + + public TruncationErAttribute toTruncationErAttribute() { + TruncationErAttribute truncationErAttribute = new TruncationErAttribute(); + truncationErAttribute.setTerId(this.terId); + truncationErAttribute.setAssociationType(this.associationType); + truncationErAttribute.setAttribute(this.attribute); + truncationErAttribute.setValue(this.value); + truncationErAttribute.setDataType(this.dataType); + return truncationErAttribute; + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/AiCallService.java b/src/main/java/com/supervision/pdfqaserver/service/AiCallService.java new file mode 100644 index 0000000..6dccb0d --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/AiCallService.java @@ -0,0 +1,10 @@ +package com.supervision.pdfqaserver.service; + +/** + * @description: AI调用服务 + */ +public interface AiCallService { + + + String call(String prompt); +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java index 59dc4cb..f8938dd 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java @@ -2,6 +2,8 @@ package com.supervision.pdfqaserver.service; import com.supervision.pdfqaserver.domain.DomainMetadata; import com.baomidou.mybatisplus.extension.service.IService; +import com.supervision.pdfqaserver.dto.DomainMetadataDTO; +import java.util.List; /** * @author Administrator @@ -11,5 +13,29 @@ import com.baomidou.mybatisplus.extension.service.IService; public interface DomainMetadataService extends IService { - void saveIfNotExists(DomainMetadata metadata); + /** + * 由于领域元数据表中加入了行业分类ID,所以该方法已不再使用 + * 建议使用 saveIfNotExists(DomainMetadata metadata,String domainCategoryId) + * @param metadata + */ + @Deprecated(since = "v_0.0.2") + void saveIfNotExists(DomainMetadata metadata); + + + void saveIfNotExists(DomainMetadata metadata, String domainCategoryId); + + + void batchSaveOrUpdateMetadata(List metadatas,String intentionId,String domainCategoryId); + + void completeSave(DomainMetadataDTO domainMetadataDTO); + + /** + * 根据主键查询数据 + * @param sourceType 源类型 + * @param targetType 目标类型 + * @param relation 关系 + * @param domainCategoryId 行业分类ID + * @return + */ + DomainMetadata getByPrimaryKey(String sourceType, String targetType, String relation,String domainCategoryId); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/IntentionDomainMetadataService.java b/src/main/java/com/supervision/pdfqaserver/service/IntentionDomainMetadataService.java index fc3e9f9..0f447e3 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/IntentionDomainMetadataService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/IntentionDomainMetadataService.java @@ -3,6 +3,8 @@ package com.supervision.pdfqaserver.service; import com.supervision.pdfqaserver.domain.IntentionDomainMetadata; import com.baomidou.mybatisplus.extension.service.IService; +import java.util.List; + /** * @author Administrator * @description 针对表【intention_domain_metadata】的数据库操作Service @@ -10,4 +12,8 @@ import com.baomidou.mybatisplus.extension.service.IService; */ public interface IntentionDomainMetadataService extends IService { + + List listByIntentionId(String intentionId); + + void batchSaveIfAbsent(String intentionId, List metadataIds); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/IntentionService.java b/src/main/java/com/supervision/pdfqaserver/service/IntentionService.java index d4c743a..470811c 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/IntentionService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/IntentionService.java @@ -1,8 +1,11 @@ package com.supervision.pdfqaserver.service; +import cn.hutool.core.util.StrUtil; import com.supervision.pdfqaserver.domain.Intention; import com.baomidou.mybatisplus.extension.service.IService; +import java.util.List; + /** * @author Administrator * @description 针对表【intention】的数据库操作Service @@ -10,4 +13,22 @@ import com.baomidou.mybatisplus.extension.service.IService; */ public interface IntentionService extends IService { + + /** + * 保存意图 + * @param intents 意图 + * @param domainCategoryId 行业分类ID + *@param truncateId 分片id + * @return + */ + List batchSaveIfAbsent(List intents, String domainCategoryId, String truncateId); + + /** + * 查询意图 + * digest 意图 + 行业分类ID domainCategoryId = 唯一 + * @param digest 意图 + * @param domainCategoryId 行业分类ID + * @return + */ + Intention queryByDigestAndDomainCategoryId(String digest, String domainCategoryId); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/IntentionTruncationService.java b/src/main/java/com/supervision/pdfqaserver/service/IntentionTruncationService.java index fe84a6b..4bdb4fa 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/IntentionTruncationService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/IntentionTruncationService.java @@ -10,4 +10,6 @@ import com.baomidou.mybatisplus.extension.service.IService; */ public interface IntentionTruncationService extends IService { + + void saveIfAbsent(String intentionId, String truncateId); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java b/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java index ff52e05..eb316fe 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java @@ -16,6 +16,22 @@ public interface KnowledgeGraphService { */ void generateGraph(String pdfId); + /** + * 元数据训练 + * @param pdfId pdfId + */ + void metaDataTrain(Integer pdfId); + + + /** + * 基于训练结果生成知识图谱 + * @param pdfId pdfId + */ + void generateGraphBaseTrain(Integer pdfId); + + + TripleConversionPipeline getTripleConversionPipeline(String contentType,String industry); + void generateGraph(List eredtoList); List truncateERE(List truncateDTOS); diff --git a/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java b/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java index abbeb9b..a19d4e2 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java @@ -19,4 +19,20 @@ public interface PdfInfoService extends IService { void pdfToGraphFail(Integer pdfId); List listNeedGenerateGraph(Integer limit); + + PdfInfo getByPdfId(Integer pdfId); + + + void updateContentType(Integer pdfId, String contentType); + + + void updateCategory(Integer pdfId, String category); + + + void pdfTrainStart(Integer pdfId); + + void pdfTrainComplete(Integer pdfId); + + void pdfTrainFail(Integer pdfId); + } diff --git a/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java b/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java index 60a5510..a8504bf 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java +++ b/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java @@ -1,10 +1,7 @@ package com.supervision.pdfqaserver.service; -import com.supervision.pdfqaserver.dto.EREDTO; -import com.supervision.pdfqaserver.dto.DocumentDTO; -import com.supervision.pdfqaserver.dto.TableTitleDTO; -import com.supervision.pdfqaserver.dto.TruncateDTO; - +import com.supervision.pdfqaserver.constant.DocumentContentTypeEnum; +import com.supervision.pdfqaserver.dto.*; import java.util.List; /** @@ -12,6 +9,56 @@ import java.util.List; */ public interface TripleConversionPipeline { + + /** + * 识别出PDF文档类型 + * @param pdfId 文档ID + * @return DocumentContentTypeEnum 文档类型 + */ + DocumentContentTypeEnum makeOutPdfContentType(Integer pdfId); + + + /** + * 识别出PDF文档行业 + * @param pdfId 文档ID + * @return 行业 + */ + String makeOutPdfIndustry(Integer pdfId); + + + /** + * 识别出truncate的意图,训练过程中使用 + * @param truncate 切分文档 + * @return String + */ + List makeOutTruncationIntent(TruncateDTO truncate); + + /** + * 识别出truncate的意图,非训练过程中使用 + * @param truncate 切分文档 + * @param intents 意图列表 + * @return + */ + + List makeOutTruncationIntent(TruncateDTO truncate, List intents); + + + /** + * 识别出truncate的领域元数据 + * @param truncate 切分文档 + * @return DomainMetadataDTO + */ + List makeOutDomainMetadata(TruncateDTO truncate,List intents); + + + /** + * 实体关系抽取 + * @param truncateDTO 切分文档 + * @param intents 意图 + * @return + */ + EREDTO doEre(TruncateDTO truncateDTO,List intents); + /** * 切分文档 * @param documents 文档列表 diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java index 2f4219b..b9041ec 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java @@ -1,20 +1,36 @@ package com.supervision.pdfqaserver.service.impl; +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.lang.Assert; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.supervision.pdfqaserver.domain.DomainMetadata; +import com.supervision.pdfqaserver.dto.DomainMetadataDTO; import com.supervision.pdfqaserver.service.DomainMetadataService; import com.supervision.pdfqaserver.mapper.DomainMetadataMapper; +import com.supervision.pdfqaserver.service.ErAttributeService; +import com.supervision.pdfqaserver.service.IntentionDomainMetadataService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import java.util.List; /** * @author Administrator * @description 针对表【domain_metadata(领域元数据)】的数据库操作Service实现 * @createDate 2025-04-27 11:45:24 */ +@Slf4j @Service +@RequiredArgsConstructor public class DomainMetadataServiceImpl extends ServiceImpl implements DomainMetadataService{ + + private final IntentionDomainMetadataService intentionDomainMetadataService; + + private final ErAttributeService erAttributeService; @Override public void saveIfNotExists(DomainMetadata metadata) { @@ -26,6 +42,60 @@ public class DomainMetadataServiceImpl extends ServiceImpl metadatas,String intentionId,String domainCategoryId) { + if (CollUtil.isEmpty(metadatas)){ + return; + } + Assert.notEmpty(domainCategoryId, "行业分类ID不能为空"); + Assert.notEmpty(intentionId, "意图ID不能为空"); + + for (DomainMetadataDTO metadata : metadatas) { + DomainMetadata data = this.getByPrimaryKey(metadata.getSourceType(), metadata.getTargetType(), metadata.getRelation(), domainCategoryId); + if (null != data){ + metadata.setId(data.getId()); + }else { + DomainMetadata domainMetadata = metadata.toDomainMetadata(); + this.saveIfNotExists(domainMetadata, domainCategoryId); + metadata.setId(domainMetadata.getId()); + } + } + } + + @Override + @Transactional(rollbackFor = Exception.class) + public void completeSave(DomainMetadataDTO domainMetadataDTO) { + /* if (CollUtil.isEmpty(domainMetadataDTOS)){ + return; + } + for (DomainMetadataDTO domainMetadataDTO : domainMetadataDTOS) { + DomainMetadata domainMetadata = domainMetadataDTO.toDomainMetadata(); + this.saveIfNotExists(domainMetadata); + // 保存属性信息 + }*/ + } + + @Override + public DomainMetadata getByPrimaryKey(String sourceType, String targetType, String relation, String domainCategoryId) { + return this.lambdaQuery() + .eq(DomainMetadata::getSourceType, sourceType) + .eq(DomainMetadata::getTargetType, targetType) + .eq(DomainMetadata::getRelation, relation) + .eq(DomainMetadata::getDomainCategoryId, domainCategoryId) + .one(); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionDomainMetadataServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionDomainMetadataServiceImpl.java index 89988a6..a221352 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionDomainMetadataServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionDomainMetadataServiceImpl.java @@ -1,10 +1,16 @@ package com.supervision.pdfqaserver.service.impl; +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.lang.Assert; +import cn.hutool.core.util.StrUtil; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.supervision.pdfqaserver.domain.IntentionDomainMetadata; import com.supervision.pdfqaserver.service.IntentionDomainMetadataService; import com.supervision.pdfqaserver.mapper.IntentionDomainMetadataMapper; import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import java.util.ArrayList; +import java.util.List; /** * @author Administrator @@ -15,6 +21,35 @@ import org.springframework.stereotype.Service; public class IntentionDomainMetadataServiceImpl extends ServiceImpl implements IntentionDomainMetadataService{ + @Override + public List listByIntentionId(String intentionId) { + if (StrUtil.isEmpty(intentionId)){ + return new ArrayList<>(); + } + return this.lambdaQuery().eq(IntentionDomainMetadata::getIntentionId, intentionId).list(); + } + + @Override + @Transactional(rollbackFor = Exception.class) + public void batchSaveIfAbsent(String intentionId, List metadataIds) { + if (CollUtil.isEmpty(metadataIds)){ + return; + } + Assert.notEmpty(intentionId, "意图ID不能为空"); + + List intentMetadataList = this.listByIntentionId(intentionId); + List list = metadataIds.stream().filter(metadataId -> intentMetadataList.stream().noneMatch(item -> item.getDomainMetadataId().equals(metadataId))).toList(); + if (CollUtil.isNotEmpty(list)){ + List doms = new ArrayList<>(); + for (String metadataId : list) { + IntentionDomainMetadata idm = new IntentionDomainMetadata(); + idm.setIntentionId(intentionId); + idm.setDomainMetadataId(metadataId); + doms.add(idm); + } + this.saveBatch(doms); + } + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java index ac6d5b9..60699f9 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java @@ -1,20 +1,65 @@ package com.supervision.pdfqaserver.service.impl; +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.lang.Assert; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.supervision.pdfqaserver.domain.Intention; import com.supervision.pdfqaserver.service.IntentionService; import com.supervision.pdfqaserver.mapper.IntentionMapper; +import com.supervision.pdfqaserver.service.IntentionTruncationService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; +import java.util.ArrayList; +import java.util.List; /** * @author Administrator * @description 针对表【intention】的数据库操作Service实现 * @createDate 2025-05-14 15:23:54 */ +@Slf4j @Service +@RequiredArgsConstructor public class IntentionServiceImpl extends ServiceImpl implements IntentionService{ + private final IntentionTruncationService intentionTruncationService; + @Override + public List batchSaveIfAbsent(List intents, String domainCategoryId,String truncateId) { + Assert.notEmpty(domainCategoryId, "行业分类ID不能为空"); + if (CollUtil.isEmpty(intents)){ + return new ArrayList<>(); + } + List intentions = this.lambdaQuery().in(Intention::getDigest, intents) + .eq(Intention::getDomainCategoryId, domainCategoryId).list(); + + List result = new ArrayList<>(); + for (String intent : intents) { + Intention one = intentions.stream().filter(i -> i.getDigest().equals(intent)).findFirst().orElse(null); + if (null == one){ + Intention intention = new Intention(); + intention.setDigest(intent); + intention.setDomainCategoryId(domainCategoryId); + intention.setGenerationType("1"); + this.save(intention); + result.add(intention); + + // 保存关联关系 + intentionTruncationService.saveIfAbsent(intention.getId(), truncateId); + }else { + result.add(one); + intentionTruncationService.saveIfAbsent(one.getId(), truncateId); + } + } + return result; + } + + @Override + public Intention queryByDigestAndDomainCategoryId(String digest, String domainCategoryId) { + return this.lambdaQuery().eq(Intention::getDigest, digest) + .eq(Intention::getDomainCategoryId, domainCategoryId).one(); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionTruncationServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionTruncationServiceImpl.java index bd00860..ac07eae 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionTruncationServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionTruncationServiceImpl.java @@ -1,5 +1,6 @@ package com.supervision.pdfqaserver.service.impl; +import cn.hutool.core.lang.Assert; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.supervision.pdfqaserver.domain.IntentionTruncation; import com.supervision.pdfqaserver.service.IntentionTruncationService; @@ -15,6 +16,19 @@ import org.springframework.stereotype.Service; public class IntentionTruncationServiceImpl extends ServiceImpl implements IntentionTruncationService{ + @Override + public void saveIfAbsent(String intentionId, String truncateId) { + Assert.notEmpty(intentionId, "意图ID不能为空"); + Assert.notEmpty(truncateId, "分片ID不能为空"); + IntentionTruncation intentionTruncation = this.lambdaQuery().eq(IntentionTruncation::getIntentionId, intentionId) + .eq(IntentionTruncation::getTruncateId, truncateId).one(); + if (null == intentionTruncation){ + IntentionTruncation truncation = new IntentionTruncation(); + truncation.setIntentionId(intentionId); + truncation.setTruncateId(truncateId); + this.save(truncation); + } + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java index d5bd51e..6b22fcb 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java @@ -2,14 +2,15 @@ package com.supervision.pdfqaserver.service.impl; import cn.hutool.core.collection.CollUtil; import cn.hutool.core.date.TimeInterval; +import cn.hutool.core.lang.Assert; import cn.hutool.core.util.NumberUtil; import cn.hutool.core.util.StrUtil; import cn.hutool.json.JSONUtil; +import com.supervision.pdfqaserver.constant.DocumentContentTypeEnum; import com.supervision.pdfqaserver.constant.DomainMetaGenerationEnum; import com.supervision.pdfqaserver.domain.*; import com.supervision.pdfqaserver.dto.*; import com.supervision.pdfqaserver.service.*; -import com.supervision.pdfqaserver.thread.KnowledgeGraphGenerateTreadPool; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.aop.framework.AopContext; @@ -46,6 +47,10 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { private final ChinesEsToEnglishGenerator chinesEsToEnglishGenerator; + private final PdfInfoService pdfInfoService; + + private final IntentionService intentionService; + @Override public void generateGraph(String pdfId) { @@ -76,6 +81,79 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { } + @Override + public void metaDataTrain(Integer pdfId) { + Assert.notNull(pdfId, "pdfId不能为空"); + PdfInfo pdfInfo = pdfInfoService.getByPdfId(pdfId); + Assert.notNull(pdfInfo, "pdfId:{}没有找到对应的pdf信息", pdfId); + if (null == pdfInfo.getTrainStatus()){ + log.info("pdfId:{}没有找到对应的pdf训练状态,开始识别文档训练状态...", pdfId); + pdfInfoService.pdfToGraphStart(pdfId); + if (StrUtil.isEmpty(pdfInfo.getContentType())){ + log.info("pdfId:{}没有找到对应的pdf内容类型,开始识别文档内容类型...", pdfId); + DocumentContentTypeEnum documentContentTypeEnum = tripleConversionPipeline.makeOutPdfContentType(pdfId); + log.info("pdfId:{}识别文档内容类型完成,内容类型:{}", pdfId, documentContentTypeEnum.getType()); + pdfInfo.setContentType(documentContentTypeEnum.getType()); + pdfInfoService.updateContentType(pdfId, documentContentTypeEnum.getType()); + } + if (StrUtil.isEmpty(pdfInfo.getDomainCategoryId())){ + log.info("pdfId:{}没有找到对应的pdf行业,开始识别文档行业...", pdfId); + String industry = tripleConversionPipeline.makeOutPdfIndustry(pdfId); + log.info("pdfId:{}识别文档行业完成,行业:{}", pdfId, industry); + pdfInfo.setDomainCategoryId(industry); + pdfInfoService.updateCategory(pdfId, industry); + } + } + TripleConversionPipeline tripleConversionPipeline = this.getTripleConversionPipeline(pdfInfo.getContentType(), pdfInfo.getDomainCategoryId()); + + List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(pdfId); + List documentIds = pdfAnalysisOutputs.stream().map(p->String.valueOf(p.getId())).collect(Collectors.toList()); + List documentTruncations = documentTruncationService.queryByDocumentIds(documentIds); + List truncateDTOS = new ArrayList<>(); + if (CollUtil.isNotEmpty(documentTruncations)){ + log.info("没有找到文档切分数据,pdfId:{},不用重置数据...", pdfId); + truncateDTOS = documentTruncations.stream().map(TruncateDTO::new).collect(Collectors.toList()); + } + if (CollUtil.isEmpty(documentTruncations)){ + log.info("开始切割文档切片,pdfId:{}", pdfId); + List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).collect(Collectors.toList()); + truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList); + log.info("切割文档切片完成,切片个数:{}", truncateDTOS.size()); + // 保存分片信息 + documentTruncationService.batchSave(truncateDTOS); + } + for (TruncateDTO truncateDTO : truncateDTOS) { + List intents = tripleConversionPipeline.makeOutTruncationIntent(truncateDTO); + List domainMetadataDTOS = tripleConversionPipeline.makeOutDomainMetadata(truncateDTO, intents); + // 保存意图数据 + List intentions = intentionService.batchSaveIfAbsent(intents, pdfInfo.getDomainCategoryId(), pdfId.toString()); + + for (Intention intention : intentions) { + List metadataDTOS = domainMetadataDTOS.stream() + .filter(d -> StrUtil.equals(d.getIntentDigest(), intention.getDigest())).toList(); + domainMetadataService.batchSaveOrUpdateMetadata(metadataDTOS,intention.getId(), pdfInfo.getDomainCategoryId()); + } + + + } + + + + + } + + @Override + public void generateGraphBaseTrain(Integer pdfId) { + + } + + @Override + public TripleConversionPipeline getTripleConversionPipeline(String contentType, String industry) { + // 内容类型决定了文本片段的切分方式,行业类别决定了文本片段的意图 + // 内容类型和行业类型确定tripleConversionPipeline的具体实现方式,现在默认是pdf类型 + return this.tripleConversionPipeline; + } + @Override public void generateGraph(List eredtoList) { log.info("开始合并实体关系抽取结果..."); @@ -108,7 +186,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { for (EntityExtractionDTO entityDTO : entities) { saveWordsIfNecessary(entityDTO.getEntity(), allWords); if (CollUtil.isNotEmpty(entityDTO.getAttributes())){ - for (ERAttributeDTO attribute : entityDTO.getAttributes()) { + for (TruncationERAttributeDTO attribute : entityDTO.getAttributes()) { saveWordsIfNecessary(attribute.getAttribute(), allWords); } } @@ -119,7 +197,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { for (RelationExtractionDTO relationDTO : relations) { saveWordsIfNecessary(relationDTO.getRelation(), allWords); if (CollUtil.isNotEmpty(relationDTO.getAttributes())){ - for (ERAttributeDTO attribute : relationDTO.getAttributes()) { + for (TruncationERAttributeDTO attribute : relationDTO.getAttributes()) { saveWordsIfNecessary(attribute.getAttribute(), allWords); } } @@ -294,8 +372,8 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { for (TruncationEntityExtraction entityExtraction : truncationEntityExtractions) { EREDTO eredto = new EREDTO(); EntityExtractionDTO extractionDTO = new EntityExtractionDTO(entityExtraction); - List attributes = truncationErAttributes.stream() - .filter(t -> StrUtil.equals(entityExtraction.getId(), t.getTerId())).map(ERAttributeDTO::new).collect(Collectors.toList()); + List attributes = truncationErAttributes.stream() + .filter(t -> StrUtil.equals(entityExtraction.getId(), t.getTerId())).map(TruncationERAttributeDTO::new).collect(Collectors.toList()); extractionDTO.setAttributes(attributes); eredto.getEntities().add(extractionDTO); eres.add(eredto); @@ -303,8 +381,8 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { for (TruncationRelationExtraction relationExtraction : truncationRelationExtractions) { EREDTO eredto = new EREDTO(); RelationExtractionDTO extractionDTO = new RelationExtractionDTO(relationExtraction); - List attributes = truncationErAttributes.stream() - .filter(t -> StrUtil.equals(relationExtraction.getId(), t.getTerId())).map(ERAttributeDTO::new).collect(Collectors.toList()); + List attributes = truncationErAttributes.stream() + .filter(t -> StrUtil.equals(relationExtraction.getId(), t.getTerId())).map(TruncationERAttributeDTO::new).collect(Collectors.toList()); extractionDTO.setAttributes(attributes); eredto.getRelations().add(extractionDTO); eres.add(eredto); diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/OllamaCallServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/OllamaCallServiceImpl.java new file mode 100644 index 0000000..53d5229 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/OllamaCallServiceImpl.java @@ -0,0 +1,19 @@ +package com.supervision.pdfqaserver.service.impl; + +import com.supervision.pdfqaserver.service.AiCallService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.ollama.OllamaChatModel; +import org.springframework.stereotype.Service; + +@Slf4j +@Service +@RequiredArgsConstructor +public class OllamaCallServiceImpl implements AiCallService { + + private final OllamaChatModel ollamaChatModel; + @Override + public String call(String prompt) { + return null; + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java index 41eae00..73dedaa 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java @@ -47,6 +47,52 @@ public class PdfInfoServiceImpl extends ServiceImpl .last("limit " + limit) .list(); } + + @Override + public PdfInfo getByPdfId(Integer pdfId) { + return this.getById(pdfId); + } + + @Override + public void updateContentType(Integer pdfId, String contentType) { + this.lambdaUpdate().eq(PdfInfo::getId, pdfId) + .set(PdfInfo::getContentType, contentType) + .update(); + } + + @Override + public void updateCategory(Integer pdfId, String category) { + this.lambdaUpdate().eq(PdfInfo::getId, pdfId) + .set(PdfInfo::getDomainCategoryId, category) + .update(); + } + + @Override + public void pdfTrainStart(Integer pdfId) { + this.lambdaUpdate().eq(PdfInfo::getId, pdfId) + .set(PdfInfo::getTrainStatus, 0) + .set(PdfInfo::getTrainStartTime, LocalDateTime.now()) + .update(); + } + + @Override + public void pdfTrainComplete(Integer pdfId) { + + this.lambdaUpdate().eq(PdfInfo::getId, pdfId) + .set(PdfInfo::getTrainStatus, 1) + .set(PdfInfo::getTrainEndTime, LocalDateTime.now()) + .update(); + } + + @Override + public void pdfTrainFail(Integer pdfId) { + this.lambdaUpdate().eq(PdfInfo::getId, pdfId) + .set(PdfInfo::getTrainStatus, 2) + .set(PdfInfo::getTrainEndTime, LocalDateTime.now()) + .update(); + } + + } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java index a6bf545..41dbd77 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java @@ -6,6 +6,7 @@ import cn.hutool.core.util.BooleanUtil; import cn.hutool.core.util.RandomUtil; import cn.hutool.core.util.StrUtil; import com.supervision.pdfqaserver.cache.PromptCache; +import com.supervision.pdfqaserver.constant.DocumentContentTypeEnum; import com.supervision.pdfqaserver.constant.LayoutTypeEnum; import com.supervision.pdfqaserver.dto.*; import com.supervision.pdfqaserver.service.TripleConversionPipeline; @@ -27,8 +28,50 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { private final OllamaChatModel ollamaChatModel; + @Override + public DocumentContentTypeEnum makeOutPdfContentType(Integer pdfId) { + return null; + } + + @Override + public String makeOutPdfIndustry(Integer pdfId) { + return null; + } + + @Override + public List makeOutTruncationIntent(TruncateDTO truncate) { + return null; + } + + @Override + public List makeOutTruncationIntent(TruncateDTO truncate, List intents) { + return null; + } + + @Override + public List makeOutDomainMetadata(TruncateDTO truncate,List intents) { + return null; + } + + @Override + public EREDTO doEre(TruncateDTO truncateDTO, List intents) { + return null; + } + + /** + * 切分文档 + * 切分规则: + * 文本类型: 以单句为最小单元,最大字数现在这1000字以内。单句超过1000字取完成的单句。 + * 表格类型: 以4行数据为最小单元。 + * @param documents 文档列表 + * @return + */ @Override public List sliceDocuments(List documents) { + + int maxTextLength = 1000; + int minTextLength = 800; + int INITIAL_BUFFER_SIZE = 1500; // 对pdfAnalysisOutputs进行排序 List documentDTOList = documents.stream().sorted( // 先对pageNo进行排序再对layoutOrder进行排序 @@ -45,6 +88,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { // 创建管道 StanfordCoreNLP pipeline = new StanfordCoreNLP(props); List truncateDTOS = new ArrayList<>(); + StringBuilder truncateTextBuild = new StringBuilder(1500); for (DocumentDTO documentDTO : documentDTOList) { String content = documentDTO.getContent(); if (StrUtil.isEmpty(content)){ @@ -58,9 +102,30 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { pipeline.annotate(document); // 获取句子 for (CoreSentence sentence : document.sentences()) { - TruncateDTO truncateDTO = new TruncateDTO(documentDTO); - truncateDTO.setContent(sentence.text()); - truncateDTOS.add(truncateDTO); + if (StrUtil.isEmpty(sentence.text())) { + continue; + } + if (sentence.text().length() >= maxTextLength) { + if (truncateTextBuild.length() >= minTextLength) { + // 提交缓存内容 + truncateDTOS.add(new TruncateDTO(documentDTO, truncateTextBuild.toString())); + truncateTextBuild = new StringBuilder(INITIAL_BUFFER_SIZE); + } + // 提交超长句子 + truncateDTOS.add(new TruncateDTO(documentDTO, sentence.text())); + } else { + if (truncateTextBuild.length() + sentence.text().length() >= minTextLength) { + truncateTextBuild.append(sentence.text()); + truncateDTOS.add(new TruncateDTO(documentDTO, truncateTextBuild.toString())); + truncateTextBuild = new StringBuilder(INITIAL_BUFFER_SIZE); + } else { + truncateTextBuild.append(sentence.text()); + } + } + } + // 处理剩余内容 + if (!truncateTextBuild.isEmpty()) { + truncateDTOS.add(new TruncateDTO(documentDTO, truncateTextBuild.toString())); } } else if (LayoutTypeEnum.TABLE.getCode() == layoutType) { // 如果是表格类型的布局,进行切分 @@ -280,11 +345,11 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { return; } // 合并属性 - List cachedAttributes = cachedRelation.getAttributes(); + List cachedAttributes = cachedRelation.getAttributes(); if (null == cachedAttributes){ cachedAttributes = new ArrayList<>(); } - for (ERAttributeDTO attribute : relation.getAttributes()) { + for (TruncationERAttributeDTO attribute : relation.getAttributes()) { String attributeKey = attribute.getAttribute(); String attributeValue = attribute.getValue(); if (StrUtil.isEmpty(attributeKey) || StrUtil.isEmpty(attributeValue)){ @@ -307,12 +372,12 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { return; } // 合并属性 - List cachedAttributes = cachedEntity.getAttributes(); + List cachedAttributes = cachedEntity.getAttributes(); if (null == cachedAttributes){ cachedAttributes = new ArrayList<>(); cachedEntity.setAttributes(cachedAttributes); } - for (ERAttributeDTO attribute : entity.getAttributes()) { + for (TruncationERAttributeDTO attribute : entity.getAttributes()) { String attributeKey = attribute.getAttribute(); String attributeValue = attribute.getValue(); if (StrUtil.isEmpty(attributeKey) || StrUtil.isEmpty(attributeValue)){ diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java index d897037..b156ecd 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java @@ -5,7 +5,7 @@ import cn.hutool.core.util.StrUtil; import cn.hutool.json.JSONUtil; import com.supervision.pdfqaserver.cache.PromptCache; import com.supervision.pdfqaserver.dao.Neo4jRepository; -import com.supervision.pdfqaserver.dto.ERAttributeDTO; +import com.supervision.pdfqaserver.dto.TruncationERAttributeDTO; import com.supervision.pdfqaserver.dto.EREDTO; import com.supervision.pdfqaserver.dto.EntityExtractionDTO; import com.supervision.pdfqaserver.dto.RelationExtractionDTO; @@ -60,7 +60,7 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor { continue; } Map attributes = entity.getAttributes().stream().collect(Collectors.toMap( - ERAttributeDTO::getAttributeEn, ERAttributeDTO::getValue + TruncationERAttributeDTO::getAttributeEn, TruncationERAttributeDTO::getValue )); attributes.put("truncationId", entity.getTruncationId()); attributes.put("name", entity.getName()); @@ -85,7 +85,7 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor { continue; } Map attributes = relation.getAttributes().stream().collect(Collectors.toMap( - ERAttributeDTO::getAttributeEn, ERAttributeDTO::getValue + TruncationERAttributeDTO::getAttributeEn, TruncationERAttributeDTO::getValue )); attributes.put("sourceType", relation.getSourceType()); attributes.put("truncationId", relation.getTruncationId()); diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java index 7671764..9b51c60 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java @@ -5,7 +5,7 @@ import cn.hutool.core.util.StrUtil; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.supervision.pdfqaserver.domain.TruncationEntityExtraction; import com.supervision.pdfqaserver.domain.TruncationErAttribute; -import com.supervision.pdfqaserver.dto.ERAttributeDTO; +import com.supervision.pdfqaserver.dto.TruncationERAttributeDTO; import com.supervision.pdfqaserver.dto.EntityExtractionDTO; import com.supervision.pdfqaserver.service.TruncationEntityExtractionService; import com.supervision.pdfqaserver.mapper.TruncationEntityExtractionMapper; @@ -39,11 +39,11 @@ public class TruncationEntityExtractionServiceImpl extends ServiceImpl attributes = entity.getAttributes(); + List attributes = entity.getAttributes(); if (CollUtil.isEmpty(attributes)){ continue; } - for (ERAttributeDTO attribute : attributes) { + for (TruncationERAttributeDTO attribute : attributes) { attribute.setTerId(tee.getId()); TruncationErAttribute era = attribute.toTruncationErAttribute(); truncationErAttributeService.save(era); diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java index 1421a61..fd79a1d 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java @@ -5,7 +5,7 @@ import cn.hutool.core.util.StrUtil; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.supervision.pdfqaserver.domain.TruncationErAttribute; import com.supervision.pdfqaserver.domain.TruncationRelationExtraction; -import com.supervision.pdfqaserver.dto.ERAttributeDTO; +import com.supervision.pdfqaserver.dto.TruncationERAttributeDTO; import com.supervision.pdfqaserver.dto.RelationExtractionDTO; import com.supervision.pdfqaserver.service.TruncationErAttributeService; import com.supervision.pdfqaserver.service.TruncationRelationExtractionService; @@ -40,7 +40,7 @@ public class TruncationRelationExtractionServiceImpl extends ServiceImpl - @@ -19,6 +18,6 @@ id,domain_metadata_id,er_name, attr_name,attr_value_type,er_type, - parent_id,create_time,update_time + create_time,update_time diff --git a/src/main/resources/mapper/IntentionMapper.xml b/src/main/resources/mapper/IntentionMapper.xml index 5cf27df..4579d84 100644 --- a/src/main/resources/mapper/IntentionMapper.xml +++ b/src/main/resources/mapper/IntentionMapper.xml @@ -9,12 +9,13 @@ + - id,digest,desc, + id,digest,desc,generation_type, domain_category_id,create_time,update_time