diff --git a/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java index 47973f9..5e2d485 100644 --- a/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java +++ b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java @@ -207,27 +207,26 @@ public class PromptCache { 1. **命名规范**: - 使用`UpperCamelCase`命名实体(如`ProductCategory`) - - 使用`SCREAMING_SNAKE_CASE`命名关系(如`IS_RELATED_TO`) - - 保留数字原样(如`2023`→`2023`) - - 禁止特殊字符(如空格、括号、引号等) + - 禁止特殊字符(如空格、括号、引号、换行符等) - 优先选择技术领域通用术语 2. **转换规则**: - 直译或意译均可,但需确保语义清晰 - 若中文含多义词,选择最贴近技术场景的译法 - - 对品牌/专有名词保留原始英文(如"腾讯"→`Tencent`) + - 对品牌/专有名词保留原始英文(如"腾讯"→ Tencent) 3. **输入输出示例**: - - 输入: "用户订单" → 输出: `UserOrder`(实体) - - 输入: "属于2023年" → 输出: `BELONGS_TO_2023`(关系) - - 输入: "5G网络设备" → 输出: `5GNetworkDevice`(实体) - - 输入: "评分大于90" → 输出: `SCORE_ABOVE_90`(关系) + - 输入: "用户订单" → 输出: UserOrder + - 输入: "属于2023年" → 输出: BELONGS_TO_2023 + - 输入: "5G网络设备" → 输出: 5GNetworkDevice + - 输入: "评分大于90" → 输出: SCORE_ABOVE_90 4. **待转换文本**: {} 5. **输出要求**: - 只需返回转换后的英文名称,无需解释。 + - 不要使用``````等任何Markdown标记包装 + - 只需返回转换后的英文名称,无需解释。 """; diff --git a/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java b/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java index ced3840..9c5b9c1 100644 --- a/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java +++ b/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java @@ -27,7 +27,7 @@ public class TruncationErAttribute implements Serializable { /** * 类型 0:terId关联的id为实体 1:terId关联的id为关系 */ - private String type; + private String associationType; /** * 实体名 diff --git a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java index 8bdd12d..267e676 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java @@ -19,13 +19,15 @@ public class ERAttributeDTO { /** * 类型 0:terId关联的id为实体 1:terId关联的id为关系 */ - private String type; + private String associationType; /** * 属性名 */ private String attribute; + private String attributeEn; + /** * 属性值 */ @@ -48,7 +50,7 @@ public class ERAttributeDTO { public TruncationErAttribute toTruncationErAttribute() { TruncationErAttribute truncationErAttribute = new TruncationErAttribute(); truncationErAttribute.setTerId(this.terId); - truncationErAttribute.setType(this.type); + truncationErAttribute.setAssociationType(this.associationType); truncationErAttribute.setAttribute(this.attribute); truncationErAttribute.setValue(this.value); truncationErAttribute.setDataType(this.dataType); diff --git a/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java index 4475cf3..b423d19 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java @@ -46,7 +46,7 @@ public class EREDTO { erAttributeDTOS.add(erAttributeDTO); } } - EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,name,type, erAttributeDTOS); + EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,type,name, erAttributeDTOS); entities.add(entityExtraction); } } @@ -57,32 +57,32 @@ public class EREDTO { String target = relationJson.getString("target"); String type = relationJson.getString("type"); JSONObject attributes = relationJson.getJSONObject("attributes"); + List erAttributeDTOS = new ArrayList<>(); if (CollUtil.isNotEmpty(attributes)){ - List erAttributeDTOS = new ArrayList<>(); for (String key : attributes.keySet()) { Object value = attributes.get(key); String valueString = attributes.getString(key); ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0"); erAttributeDTOS.add(erAttributeDTO); } - if (StrUtil.isEmpty(source) || StrUtil.isEmpty(target)){ - log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson); - continue; - } - Optional sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), source)).findFirst(); - if (sourceTypeOpt.isEmpty()){ - log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson); - continue; - } - Optional targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), target)).findFirst(); - if (targetTypeOpt.isEmpty()){ - log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson); - continue; - } - RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncationId,source, - sourceTypeOpt.get().getEntity(),type,target,targetTypeOpt.get().getEntity(), erAttributeDTOS); - relationsList.add(relationExtractionDTO); } + if (StrUtil.isEmpty(source) || StrUtil.isEmpty(target)){ + log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson); + continue; + } + Optional sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), source)).findFirst(); + if (sourceTypeOpt.isEmpty()){ + log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson); + continue; + } + Optional targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), target)).findFirst(); + if (targetTypeOpt.isEmpty()){ + log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson); + continue; + } + RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncationId,source, + sourceTypeOpt.get().getEntity(),type,target,targetTypeOpt.get().getEntity(), erAttributeDTOS); + relationsList.add(relationExtractionDTO); } } eredto.setEntities(entities); @@ -107,7 +107,7 @@ public class EREDTO { } EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO(); entityExtractionDTO.setEntity("行"); - entityExtractionDTO.setName("row"); + entityExtractionDTO.setName("行"); entityExtractionDTO.setTruncationId(truncationId); List erAttributeDTOS = new ArrayList<>(); for (Map.Entry tableEntry : tableJson.entrySet()) { @@ -131,12 +131,41 @@ public class EREDTO { String entityName = entity.getEntity(); Optional first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), entityName)).findFirst(); first.ifPresent(chineseEnglishWords -> entity.setEntityEn(chineseEnglishWords.getEnglishWord())); + + if (CollUtil.isNotEmpty(entity.getAttributes())){ + for (ERAttributeDTO attribute : entity.getAttributes()) { + setAttributeEn(attribute, wordsList); + } + } } for (RelationExtractionDTO relation : relations) { String relationName = relation.getRelation(); Optional first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), relationName)).findFirst(); first.ifPresent(chineseEnglishWords -> relation.setRelationEn(chineseEnglishWords.getEnglishWord())); + + String sourceType = relation.getSourceType(); + Optional sourceTypeFirst = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), sourceType)).findFirst(); + sourceTypeFirst.ifPresent(chineseEnglishWords -> relation.setSourceTypeEn(chineseEnglishWords.getEnglishWord())); + + String targetType = relation.getTargetType(); + Optional targetTypeFirst = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), targetType)).findFirst(); + targetTypeFirst.ifPresent(chineseEnglishWords -> relation.setTargetTypeEn(chineseEnglishWords.getEnglishWord())); + + if (CollUtil.isNotEmpty(relation.getAttributes())){ + for (ERAttributeDTO attribute : relation.getAttributes()) { + setAttributeEn(attribute, wordsList); + } + } } } + + private void setAttributeEn(ERAttributeDTO attribute,List wordsList) { + if (null == attribute || CollUtil.isEmpty(wordsList)){ + return; + } + String attributeName = attribute.getAttribute(); + Optional attributeFirst = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), attributeName)).findFirst(); + attributeFirst.ifPresent(chineseEnglishWords -> attribute.setAttributeEn(chineseEnglishWords.getEnglishWord())); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java index 6a10af6..7d3f88d 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java @@ -89,7 +89,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { domainMetadataService.saveIfNotExists(domainMetadata); } } - log.info("保存领域元数据完成"); + log.info("保存领域元数据完成...."); // 保存字典 log.info("开始保存字典..."); @@ -100,12 +100,22 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { if (CollUtil.isNotEmpty(entities)){ for (EntityExtractionDTO entityDTO : entities) { saveWordsIfNecessary(entityDTO.getEntity(), allWords); + if (CollUtil.isNotEmpty(entityDTO.getAttributes())){ + for (ERAttributeDTO attribute : entityDTO.getAttributes()) { + saveWordsIfNecessary(attribute.getAttribute(), allWords); + } + } } } List relations = eredto.getRelations(); if (CollUtil.isNotEmpty(relations)){ for (RelationExtractionDTO relationDTO : relations) { saveWordsIfNecessary(relationDTO.getRelation(), allWords); + if (CollUtil.isNotEmpty(relationDTO.getAttributes())){ + for (ERAttributeDTO attribute : relationDTO.getAttributes()) { + saveWordsIfNecessary(attribute.getAttribute(), allWords); + } + } } } } @@ -128,7 +138,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { } String generate = chinesEsToEnglishGenerator.generate(word); if (StrUtil.isEmpty(generate)){ - log.info("生成英文名称失败,entity:{}", word); + log.warn("生成英文名称失败,entity:{}", word); return; } ChineseEnglishWords words = new ChineseEnglishWords(); diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java index d18ebc6..7690530 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java @@ -15,6 +15,7 @@ import org.springframework.ai.ollama.OllamaChatModel; import org.springframework.stereotype.Service; import java.util.*; +import java.util.stream.Collectors; @Slf4j @Service @@ -102,15 +103,15 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { log.info("doTableEre响应结果:{}", response); // todo:暂时不去处理异常返回 EREDTO eredto = EREDTO.fromTableJson(response, truncateDTO.getId()); + // 手动设置表格标题 EntityExtractionDTO titleEntity = new EntityExtractionDTO(); titleEntity.setEntity("表"); titleEntity.setName(truncateDTO.getTitle()); - // // 添加关系 - ArrayList relations = new ArrayList<>(); + List relations = new ArrayList<>(); for (EntityExtractionDTO entity : eredto.getEntities()) { RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncateDTO.getId(), - titleEntity.getEntity(), titleEntity.getName(), "包含", entity.getEntity(), entity.getName(), entity.getAttributes()); + titleEntity.getName(), titleEntity.getEntity(), "包含", entity.getName(), entity.getEntity(), entity.getAttributes()); relations.add(relationExtractionDTO); } eredto.getEntities().add(titleEntity); @@ -120,6 +121,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { /** * 合并实体关系抽取结果 主要是对实体和关系中的属性进行合并 + * 表不参与合并 * @param eredtoList 实体关系抽取结果列表 * @return */ @@ -129,6 +131,13 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { if (CollUtil.isEmpty(eredtoList)){ return merged; } + // 将表单独拿出来 + merged = eredtoList.stream().filter(ere-> + ere.getEntities().stream().anyMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList()); + + // 把剩下的数据进行合并计算 + eredtoList = eredtoList.stream().filter(ere-> + ere.getEntities().stream().noneMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList()); Map entityMap = new HashMap<>(); Map relationMap = new HashMap<>(); for (EREDTO eredto : eredtoList) { @@ -153,12 +162,12 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { Set relationEntityKey = new HashSet<>(); for (Map.Entry relationEntry : relationMap.entrySet()) { RelationExtractionDTO value = relationEntry.getValue(); - EntityExtractionDTO sourceEntity = entityMap.get(StrUtil.join("_", value.getSourceType(), value.getSource())); + EntityExtractionDTO sourceEntity = entityMap.get(StrUtil.join("_",value.getSourceType(), value.getSource())); if (null == sourceEntity){ log.warn("mergeEreResults:根据entity:{},name:{}未在entityMap中找到头节点映射关系", value.getSourceType(), value.getSource()); continue; } - EntityExtractionDTO targetEntity = entityMap.get(StrUtil.join("_", value.getTargetType(), value.getTarget())); + EntityExtractionDTO targetEntity = entityMap.get(StrUtil.join("_", value.getTargetType(),value.getTarget())); if (null == targetEntity){ log.warn("mergeEreResults:根据entity:{},name:{}未在entityMap中找到尾节点映射关系", value.getTargetType(), value.getTarget()); continue; @@ -184,17 +193,17 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { private void mergeAttribute(Map entityMap,RelationExtractionDTO relation, String key) { - RelationExtractionDTO cachedEntity = entityMap.get(key); - if (null == cachedEntity){ + RelationExtractionDTO cachedRelation = entityMap.get(key); + if (null == cachedRelation){ entityMap.put(key, relation); }else { if (CollUtil.isEmpty(relation.getAttributes())){ return; } // 合并属性 - List attributes = relation.getAttributes(); - if (null == attributes){ - attributes = new ArrayList<>(); + List cachedAttributes = cachedRelation.getAttributes(); + if (null == cachedAttributes){ + cachedAttributes = new ArrayList<>(); } for (ERAttributeDTO attribute : relation.getAttributes()) { String attributeKey = attribute.getAttribute(); @@ -203,8 +212,8 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { continue; } // 如果属性已经存在,则不添加 - if (attributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) { - attributes.add(attribute); + if (cachedAttributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) { + cachedAttributes.add(attribute); } } } @@ -219,9 +228,10 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { return; } // 合并属性 - List attributes = entity.getAttributes(); - if (null == attributes){ - attributes = new ArrayList<>(); + List cachedAttributes = cachedEntity.getAttributes(); + if (null == cachedAttributes){ + cachedAttributes = new ArrayList<>(); + cachedEntity.setAttributes(cachedAttributes); } for (ERAttributeDTO attribute : entity.getAttributes()) { String attributeKey = attribute.getAttribute(); @@ -230,8 +240,8 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { continue; } // 如果属性已经存在,则不添加 - if (attributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) { - attributes.add(attribute); + if (cachedAttributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) { + cachedAttributes.add(attribute); } } } @@ -242,6 +252,6 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { } private String generateRelationMapKey(RelationExtractionDTO relationExtractionDTO) { - return relationExtractionDTO.getSource() + "_" + relationExtractionDTO.getTarget() + "_" + relationExtractionDTO.getRelation(); + return relationExtractionDTO.getSource()+ "_" + relationExtractionDTO.getRelation() + "_" + relationExtractionDTO.getTarget(); } } diff --git a/src/main/resources/mapper/TruncationErAttributeMapper.xml b/src/main/resources/mapper/TruncationErAttributeMapper.xml index 894cebe..d120575 100644 --- a/src/main/resources/mapper/TruncationErAttributeMapper.xml +++ b/src/main/resources/mapper/TruncationErAttributeMapper.xml @@ -7,7 +7,7 @@ - + @@ -16,7 +16,7 @@ - id,ter_id,type, + id,ter_id,associationType, attribute,value,data_type, create_time,update_time