generateGraph 功能初始化

master
xueqingkun 2 months ago
parent 76e9d05a7b
commit c97248db48

@ -207,27 +207,26 @@ public class PromptCache {
1. **** 1. ****
- 使`UpperCamelCase``ProductCategory` - 使`UpperCamelCase``ProductCategory`
- 使`SCREAMING_SNAKE_CASE``IS_RELATED_TO` -
- `2023``2023`
-
- -
2. **** 2. ****
- -
- -
- /"腾讯"`Tencent` - /"腾讯" Tencent
3. **** 3. ****
- : "用户订单" : `UserOrder` - : "用户订单" : UserOrder
- : "属于2023年" : `BELONGS_TO_2023` - : "属于2023年" : BELONGS_TO_2023
- : "5G网络设备" : `5GNetworkDevice` - : "5G网络设备" : 5GNetworkDevice
- : "评分大于90" : `SCORE_ABOVE_90` - : "评分大于90" : SCORE_ABOVE_90
4. **** 4. ****
{} {}
5. **** 5. ****
- 使``````Markdown
-
"""; """;

@ -27,7 +27,7 @@ public class TruncationErAttribute implements Serializable {
/** /**
* 0terIdid 1terIdid * 0terIdid 1terIdid
*/ */
private String type; private String associationType;
/** /**
* *

@ -19,13 +19,15 @@ public class ERAttributeDTO {
/** /**
* 0terIdid 1terIdid * 0terIdid 1terIdid
*/ */
private String type; private String associationType;
/** /**
* *
*/ */
private String attribute; private String attribute;
private String attributeEn;
/** /**
* *
*/ */
@ -48,7 +50,7 @@ public class ERAttributeDTO {
public TruncationErAttribute toTruncationErAttribute() { public TruncationErAttribute toTruncationErAttribute() {
TruncationErAttribute truncationErAttribute = new TruncationErAttribute(); TruncationErAttribute truncationErAttribute = new TruncationErAttribute();
truncationErAttribute.setTerId(this.terId); truncationErAttribute.setTerId(this.terId);
truncationErAttribute.setType(this.type); truncationErAttribute.setAssociationType(this.associationType);
truncationErAttribute.setAttribute(this.attribute); truncationErAttribute.setAttribute(this.attribute);
truncationErAttribute.setValue(this.value); truncationErAttribute.setValue(this.value);
truncationErAttribute.setDataType(this.dataType); truncationErAttribute.setDataType(this.dataType);

@ -46,7 +46,7 @@ public class EREDTO {
erAttributeDTOS.add(erAttributeDTO); erAttributeDTOS.add(erAttributeDTO);
} }
} }
EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,name,type, erAttributeDTOS); EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,type,name, erAttributeDTOS);
entities.add(entityExtraction); entities.add(entityExtraction);
} }
} }
@ -57,32 +57,32 @@ public class EREDTO {
String target = relationJson.getString("target"); String target = relationJson.getString("target");
String type = relationJson.getString("type"); String type = relationJson.getString("type");
JSONObject attributes = relationJson.getJSONObject("attributes"); JSONObject attributes = relationJson.getJSONObject("attributes");
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
if (CollUtil.isNotEmpty(attributes)){ if (CollUtil.isNotEmpty(attributes)){
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
for (String key : attributes.keySet()) { for (String key : attributes.keySet()) {
Object value = attributes.get(key); Object value = attributes.get(key);
String valueString = attributes.getString(key); String valueString = attributes.getString(key);
ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0"); ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0");
erAttributeDTOS.add(erAttributeDTO); erAttributeDTOS.add(erAttributeDTO);
} }
if (StrUtil.isEmpty(source) || StrUtil.isEmpty(target)){
log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson);
continue;
}
Optional<EntityExtractionDTO> sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), source)).findFirst();
if (sourceTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson);
continue;
}
Optional<EntityExtractionDTO> targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), target)).findFirst();
if (targetTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson);
continue;
}
RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncationId,source,
sourceTypeOpt.get().getEntity(),type,target,targetTypeOpt.get().getEntity(), erAttributeDTOS);
relationsList.add(relationExtractionDTO);
} }
if (StrUtil.isEmpty(source) || StrUtil.isEmpty(target)){
log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson);
continue;
}
Optional<EntityExtractionDTO> sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), source)).findFirst();
if (sourceTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson);
continue;
}
Optional<EntityExtractionDTO> targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), target)).findFirst();
if (targetTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson);
continue;
}
RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncationId,source,
sourceTypeOpt.get().getEntity(),type,target,targetTypeOpt.get().getEntity(), erAttributeDTOS);
relationsList.add(relationExtractionDTO);
} }
} }
eredto.setEntities(entities); eredto.setEntities(entities);
@ -107,7 +107,7 @@ public class EREDTO {
} }
EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO(); EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO();
entityExtractionDTO.setEntity("行"); entityExtractionDTO.setEntity("行");
entityExtractionDTO.setName("row"); entityExtractionDTO.setName("");
entityExtractionDTO.setTruncationId(truncationId); entityExtractionDTO.setTruncationId(truncationId);
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>(); List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
for (Map.Entry<String, Object> tableEntry : tableJson.entrySet()) { for (Map.Entry<String, Object> tableEntry : tableJson.entrySet()) {
@ -131,12 +131,41 @@ public class EREDTO {
String entityName = entity.getEntity(); String entityName = entity.getEntity();
Optional<ChineseEnglishWords> first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), entityName)).findFirst(); Optional<ChineseEnglishWords> first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), entityName)).findFirst();
first.ifPresent(chineseEnglishWords -> entity.setEntityEn(chineseEnglishWords.getEnglishWord())); first.ifPresent(chineseEnglishWords -> entity.setEntityEn(chineseEnglishWords.getEnglishWord()));
if (CollUtil.isNotEmpty(entity.getAttributes())){
for (ERAttributeDTO attribute : entity.getAttributes()) {
setAttributeEn(attribute, wordsList);
}
}
} }
for (RelationExtractionDTO relation : relations) { for (RelationExtractionDTO relation : relations) {
String relationName = relation.getRelation(); String relationName = relation.getRelation();
Optional<ChineseEnglishWords> first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), relationName)).findFirst(); Optional<ChineseEnglishWords> first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), relationName)).findFirst();
first.ifPresent(chineseEnglishWords -> relation.setRelationEn(chineseEnglishWords.getEnglishWord())); first.ifPresent(chineseEnglishWords -> relation.setRelationEn(chineseEnglishWords.getEnglishWord()));
String sourceType = relation.getSourceType();
Optional<ChineseEnglishWords> sourceTypeFirst = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), sourceType)).findFirst();
sourceTypeFirst.ifPresent(chineseEnglishWords -> relation.setSourceTypeEn(chineseEnglishWords.getEnglishWord()));
String targetType = relation.getTargetType();
Optional<ChineseEnglishWords> targetTypeFirst = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), targetType)).findFirst();
targetTypeFirst.ifPresent(chineseEnglishWords -> relation.setTargetTypeEn(chineseEnglishWords.getEnglishWord()));
if (CollUtil.isNotEmpty(relation.getAttributes())){
for (ERAttributeDTO attribute : relation.getAttributes()) {
setAttributeEn(attribute, wordsList);
}
}
} }
} }
private void setAttributeEn(ERAttributeDTO attribute,List<ChineseEnglishWords> wordsList) {
if (null == attribute || CollUtil.isEmpty(wordsList)){
return;
}
String attributeName = attribute.getAttribute();
Optional<ChineseEnglishWords> attributeFirst = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), attributeName)).findFirst();
attributeFirst.ifPresent(chineseEnglishWords -> attribute.setAttributeEn(chineseEnglishWords.getEnglishWord()));
}
} }

@ -89,7 +89,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
domainMetadataService.saveIfNotExists(domainMetadata); domainMetadataService.saveIfNotExists(domainMetadata);
} }
} }
log.info("保存领域元数据完成"); log.info("保存领域元数据完成....");
// 保存字典 // 保存字典
log.info("开始保存字典..."); log.info("开始保存字典...");
@ -100,12 +100,22 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
if (CollUtil.isNotEmpty(entities)){ if (CollUtil.isNotEmpty(entities)){
for (EntityExtractionDTO entityDTO : entities) { for (EntityExtractionDTO entityDTO : entities) {
saveWordsIfNecessary(entityDTO.getEntity(), allWords); saveWordsIfNecessary(entityDTO.getEntity(), allWords);
if (CollUtil.isNotEmpty(entityDTO.getAttributes())){
for (ERAttributeDTO attribute : entityDTO.getAttributes()) {
saveWordsIfNecessary(attribute.getAttribute(), allWords);
}
}
} }
} }
List<RelationExtractionDTO> relations = eredto.getRelations(); List<RelationExtractionDTO> relations = eredto.getRelations();
if (CollUtil.isNotEmpty(relations)){ if (CollUtil.isNotEmpty(relations)){
for (RelationExtractionDTO relationDTO : relations) { for (RelationExtractionDTO relationDTO : relations) {
saveWordsIfNecessary(relationDTO.getRelation(), allWords); saveWordsIfNecessary(relationDTO.getRelation(), allWords);
if (CollUtil.isNotEmpty(relationDTO.getAttributes())){
for (ERAttributeDTO attribute : relationDTO.getAttributes()) {
saveWordsIfNecessary(attribute.getAttribute(), allWords);
}
}
} }
} }
} }
@ -128,7 +138,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
} }
String generate = chinesEsToEnglishGenerator.generate(word); String generate = chinesEsToEnglishGenerator.generate(word);
if (StrUtil.isEmpty(generate)){ if (StrUtil.isEmpty(generate)){
log.info("生成英文名称失败entity:{}", word); log.warn("生成英文名称失败entity:{}", word);
return; return;
} }
ChineseEnglishWords words = new ChineseEnglishWords(); ChineseEnglishWords words = new ChineseEnglishWords();

@ -15,6 +15,7 @@ import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.*; import java.util.*;
import java.util.stream.Collectors;
@Slf4j @Slf4j
@Service @Service
@ -102,15 +103,15 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
log.info("doTableEre响应结果:{}", response); log.info("doTableEre响应结果:{}", response);
// todo:暂时不去处理异常返回 // todo:暂时不去处理异常返回
EREDTO eredto = EREDTO.fromTableJson(response, truncateDTO.getId()); EREDTO eredto = EREDTO.fromTableJson(response, truncateDTO.getId());
// 手动设置表格标题
EntityExtractionDTO titleEntity = new EntityExtractionDTO(); EntityExtractionDTO titleEntity = new EntityExtractionDTO();
titleEntity.setEntity("表"); titleEntity.setEntity("表");
titleEntity.setName(truncateDTO.getTitle()); titleEntity.setName(truncateDTO.getTitle());
//
// 添加关系 // 添加关系
ArrayList<RelationExtractionDTO> relations = new ArrayList<>(); List<RelationExtractionDTO> relations = new ArrayList<>();
for (EntityExtractionDTO entity : eredto.getEntities()) { for (EntityExtractionDTO entity : eredto.getEntities()) {
RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncateDTO.getId(), RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncateDTO.getId(),
titleEntity.getEntity(), titleEntity.getName(), "包含", entity.getEntity(), entity.getName(), entity.getAttributes()); titleEntity.getName(), titleEntity.getEntity(), "包含", entity.getName(), entity.getEntity(), entity.getAttributes());
relations.add(relationExtractionDTO); relations.add(relationExtractionDTO);
} }
eredto.getEntities().add(titleEntity); eredto.getEntities().add(titleEntity);
@ -120,6 +121,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
/** /**
* *
*
* @param eredtoList * @param eredtoList
* @return * @return
*/ */
@ -129,6 +131,13 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
if (CollUtil.isEmpty(eredtoList)){ if (CollUtil.isEmpty(eredtoList)){
return merged; return merged;
} }
// 将表单独拿出来
merged = eredtoList.stream().filter(ere->
ere.getEntities().stream().anyMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList());
// 把剩下的数据进行合并计算
eredtoList = eredtoList.stream().filter(ere->
ere.getEntities().stream().noneMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList());
Map<String, EntityExtractionDTO> entityMap = new HashMap<>(); Map<String, EntityExtractionDTO> entityMap = new HashMap<>();
Map<String, RelationExtractionDTO> relationMap = new HashMap<>(); Map<String, RelationExtractionDTO> relationMap = new HashMap<>();
for (EREDTO eredto : eredtoList) { for (EREDTO eredto : eredtoList) {
@ -153,12 +162,12 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
Set<String> relationEntityKey = new HashSet<>(); Set<String> relationEntityKey = new HashSet<>();
for (Map.Entry<String, RelationExtractionDTO> relationEntry : relationMap.entrySet()) { for (Map.Entry<String, RelationExtractionDTO> relationEntry : relationMap.entrySet()) {
RelationExtractionDTO value = relationEntry.getValue(); RelationExtractionDTO value = relationEntry.getValue();
EntityExtractionDTO sourceEntity = entityMap.get(StrUtil.join("_", value.getSourceType(), value.getSource())); EntityExtractionDTO sourceEntity = entityMap.get(StrUtil.join("_",value.getSourceType(), value.getSource()));
if (null == sourceEntity){ if (null == sourceEntity){
log.warn("mergeEreResults:根据entity:{},name:{}未在entityMap中找到头节点映射关系", value.getSourceType(), value.getSource()); log.warn("mergeEreResults:根据entity:{},name:{}未在entityMap中找到头节点映射关系", value.getSourceType(), value.getSource());
continue; continue;
} }
EntityExtractionDTO targetEntity = entityMap.get(StrUtil.join("_", value.getTargetType(), value.getTarget())); EntityExtractionDTO targetEntity = entityMap.get(StrUtil.join("_", value.getTargetType(),value.getTarget()));
if (null == targetEntity){ if (null == targetEntity){
log.warn("mergeEreResults:根据entity:{},name:{}未在entityMap中找到尾节点映射关系", value.getTargetType(), value.getTarget()); log.warn("mergeEreResults:根据entity:{},name:{}未在entityMap中找到尾节点映射关系", value.getTargetType(), value.getTarget());
continue; continue;
@ -184,17 +193,17 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
private void mergeAttribute(Map<String, RelationExtractionDTO> entityMap,RelationExtractionDTO relation, String key) { private void mergeAttribute(Map<String, RelationExtractionDTO> entityMap,RelationExtractionDTO relation, String key) {
RelationExtractionDTO cachedEntity = entityMap.get(key); RelationExtractionDTO cachedRelation = entityMap.get(key);
if (null == cachedEntity){ if (null == cachedRelation){
entityMap.put(key, relation); entityMap.put(key, relation);
}else { }else {
if (CollUtil.isEmpty(relation.getAttributes())){ if (CollUtil.isEmpty(relation.getAttributes())){
return; return;
} }
// 合并属性 // 合并属性
List<ERAttributeDTO> attributes = relation.getAttributes(); List<ERAttributeDTO> cachedAttributes = cachedRelation.getAttributes();
if (null == attributes){ if (null == cachedAttributes){
attributes = new ArrayList<>(); cachedAttributes = new ArrayList<>();
} }
for (ERAttributeDTO attribute : relation.getAttributes()) { for (ERAttributeDTO attribute : relation.getAttributes()) {
String attributeKey = attribute.getAttribute(); String attributeKey = attribute.getAttribute();
@ -203,8 +212,8 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
continue; continue;
} }
// 如果属性已经存在,则不添加 // 如果属性已经存在,则不添加
if (attributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) { if (cachedAttributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) {
attributes.add(attribute); cachedAttributes.add(attribute);
} }
} }
} }
@ -219,9 +228,10 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
return; return;
} }
// 合并属性 // 合并属性
List<ERAttributeDTO> attributes = entity.getAttributes(); List<ERAttributeDTO> cachedAttributes = cachedEntity.getAttributes();
if (null == attributes){ if (null == cachedAttributes){
attributes = new ArrayList<>(); cachedAttributes = new ArrayList<>();
cachedEntity.setAttributes(cachedAttributes);
} }
for (ERAttributeDTO attribute : entity.getAttributes()) { for (ERAttributeDTO attribute : entity.getAttributes()) {
String attributeKey = attribute.getAttribute(); String attributeKey = attribute.getAttribute();
@ -230,8 +240,8 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
continue; continue;
} }
// 如果属性已经存在,则不添加 // 如果属性已经存在,则不添加
if (attributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) { if (cachedAttributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) {
attributes.add(attribute); cachedAttributes.add(attribute);
} }
} }
} }
@ -242,6 +252,6 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
} }
private String generateRelationMapKey(RelationExtractionDTO relationExtractionDTO) { private String generateRelationMapKey(RelationExtractionDTO relationExtractionDTO) {
return relationExtractionDTO.getSource() + "_" + relationExtractionDTO.getTarget() + "_" + relationExtractionDTO.getRelation(); return relationExtractionDTO.getSource()+ "_" + relationExtractionDTO.getRelation() + "_" + relationExtractionDTO.getTarget();
} }
} }

@ -7,7 +7,7 @@
<resultMap id="BaseResultMap" type="com.supervision.pdfqaserver.domain.TruncationErAttribute"> <resultMap id="BaseResultMap" type="com.supervision.pdfqaserver.domain.TruncationErAttribute">
<id property="id" column="id" jdbcType="VARCHAR"/> <id property="id" column="id" jdbcType="VARCHAR"/>
<result property="terId" column="ter_id" jdbcType="VARCHAR"/> <result property="terId" column="ter_id" jdbcType="VARCHAR"/>
<result property="type" column="type" jdbcType="VARCHAR"/> <result property="associationType" column="association_type" jdbcType="VARCHAR"/>
<result property="attribute" column="attribute" jdbcType="VARCHAR"/> <result property="attribute" column="attribute" jdbcType="VARCHAR"/>
<result property="value" column="value" jdbcType="VARCHAR"/> <result property="value" column="value" jdbcType="VARCHAR"/>
<result property="dataType" column="data_type" jdbcType="VARCHAR"/> <result property="dataType" column="data_type" jdbcType="VARCHAR"/>
@ -16,7 +16,7 @@
</resultMap> </resultMap>
<sql id="Base_Column_List"> <sql id="Base_Column_List">
id,ter_id,type, id,ter_id,associationType,
attribute,value,data_type, attribute,value,data_type,
create_time,update_time create_time,update_time
</sql> </sql>

Loading…
Cancel
Save