generateGraph 功能初始化

master
xueqingkun 2 months ago
parent 76e9d05a7b
commit c97248db48

@ -207,27 +207,26 @@ public class PromptCache {
1. ****
- 使`UpperCamelCase``ProductCategory`
- 使`SCREAMING_SNAKE_CASE``IS_RELATED_TO`
- `2023``2023`
-
-
-
2. ****
-
-
- /"腾讯"`Tencent`
- /"腾讯" Tencent
3. ****
- : "用户订单" : `UserOrder`
- : "属于2023年" : `BELONGS_TO_2023`
- : "5G网络设备" : `5GNetworkDevice`
- : "评分大于90" : `SCORE_ABOVE_90`
- : "用户订单" : UserOrder
- : "属于2023年" : BELONGS_TO_2023
- : "5G网络设备" : 5GNetworkDevice
- : "评分大于90" : SCORE_ABOVE_90
4. ****
{}
5. ****
- 使``````Markdown
-
""";

@ -27,7 +27,7 @@ public class TruncationErAttribute implements Serializable {
/**
* 0terIdid 1terIdid
*/
private String type;
private String associationType;
/**
*

@ -19,13 +19,15 @@ public class ERAttributeDTO {
/**
* 0terIdid 1terIdid
*/
private String type;
private String associationType;
/**
*
*/
private String attribute;
private String attributeEn;
/**
*
*/
@ -48,7 +50,7 @@ public class ERAttributeDTO {
public TruncationErAttribute toTruncationErAttribute() {
TruncationErAttribute truncationErAttribute = new TruncationErAttribute();
truncationErAttribute.setTerId(this.terId);
truncationErAttribute.setType(this.type);
truncationErAttribute.setAssociationType(this.associationType);
truncationErAttribute.setAttribute(this.attribute);
truncationErAttribute.setValue(this.value);
truncationErAttribute.setDataType(this.dataType);

@ -46,7 +46,7 @@ public class EREDTO {
erAttributeDTOS.add(erAttributeDTO);
}
}
EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,name,type, erAttributeDTOS);
EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,type,name, erAttributeDTOS);
entities.add(entityExtraction);
}
}
@ -57,24 +57,25 @@ public class EREDTO {
String target = relationJson.getString("target");
String type = relationJson.getString("type");
JSONObject attributes = relationJson.getJSONObject("attributes");
if (CollUtil.isNotEmpty(attributes)){
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
if (CollUtil.isNotEmpty(attributes)){
for (String key : attributes.keySet()) {
Object value = attributes.get(key);
String valueString = attributes.getString(key);
ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0");
erAttributeDTOS.add(erAttributeDTO);
}
}
if (StrUtil.isEmpty(source) || StrUtil.isEmpty(target)){
log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson);
continue;
}
Optional<EntityExtractionDTO> sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), source)).findFirst();
Optional<EntityExtractionDTO> sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), source)).findFirst();
if (sourceTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson);
continue;
}
Optional<EntityExtractionDTO> targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), target)).findFirst();
Optional<EntityExtractionDTO> targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), target)).findFirst();
if (targetTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson);
continue;
@ -84,7 +85,6 @@ public class EREDTO {
relationsList.add(relationExtractionDTO);
}
}
}
eredto.setEntities(entities);
eredto.setRelations(relationsList);
return eredto;
@ -107,7 +107,7 @@ public class EREDTO {
}
EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO();
entityExtractionDTO.setEntity("行");
entityExtractionDTO.setName("row");
entityExtractionDTO.setName("");
entityExtractionDTO.setTruncationId(truncationId);
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
for (Map.Entry<String, Object> tableEntry : tableJson.entrySet()) {
@ -131,12 +131,41 @@ public class EREDTO {
String entityName = entity.getEntity();
Optional<ChineseEnglishWords> first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), entityName)).findFirst();
first.ifPresent(chineseEnglishWords -> entity.setEntityEn(chineseEnglishWords.getEnglishWord()));
if (CollUtil.isNotEmpty(entity.getAttributes())){
for (ERAttributeDTO attribute : entity.getAttributes()) {
setAttributeEn(attribute, wordsList);
}
}
}
for (RelationExtractionDTO relation : relations) {
String relationName = relation.getRelation();
Optional<ChineseEnglishWords> first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), relationName)).findFirst();
first.ifPresent(chineseEnglishWords -> relation.setRelationEn(chineseEnglishWords.getEnglishWord()));
String sourceType = relation.getSourceType();
Optional<ChineseEnglishWords> sourceTypeFirst = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), sourceType)).findFirst();
sourceTypeFirst.ifPresent(chineseEnglishWords -> relation.setSourceTypeEn(chineseEnglishWords.getEnglishWord()));
String targetType = relation.getTargetType();
Optional<ChineseEnglishWords> targetTypeFirst = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), targetType)).findFirst();
targetTypeFirst.ifPresent(chineseEnglishWords -> relation.setTargetTypeEn(chineseEnglishWords.getEnglishWord()));
if (CollUtil.isNotEmpty(relation.getAttributes())){
for (ERAttributeDTO attribute : relation.getAttributes()) {
setAttributeEn(attribute, wordsList);
}
}
}
}
private void setAttributeEn(ERAttributeDTO attribute,List<ChineseEnglishWords> wordsList) {
if (null == attribute || CollUtil.isEmpty(wordsList)){
return;
}
String attributeName = attribute.getAttribute();
Optional<ChineseEnglishWords> attributeFirst = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), attributeName)).findFirst();
attributeFirst.ifPresent(chineseEnglishWords -> attribute.setAttributeEn(chineseEnglishWords.getEnglishWord()));
}
}

@ -89,7 +89,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
domainMetadataService.saveIfNotExists(domainMetadata);
}
}
log.info("保存领域元数据完成");
log.info("保存领域元数据完成....");
// 保存字典
log.info("开始保存字典...");
@ -100,12 +100,22 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
if (CollUtil.isNotEmpty(entities)){
for (EntityExtractionDTO entityDTO : entities) {
saveWordsIfNecessary(entityDTO.getEntity(), allWords);
if (CollUtil.isNotEmpty(entityDTO.getAttributes())){
for (ERAttributeDTO attribute : entityDTO.getAttributes()) {
saveWordsIfNecessary(attribute.getAttribute(), allWords);
}
}
}
}
List<RelationExtractionDTO> relations = eredto.getRelations();
if (CollUtil.isNotEmpty(relations)){
for (RelationExtractionDTO relationDTO : relations) {
saveWordsIfNecessary(relationDTO.getRelation(), allWords);
if (CollUtil.isNotEmpty(relationDTO.getAttributes())){
for (ERAttributeDTO attribute : relationDTO.getAttributes()) {
saveWordsIfNecessary(attribute.getAttribute(), allWords);
}
}
}
}
}
@ -128,7 +138,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
}
String generate = chinesEsToEnglishGenerator.generate(word);
if (StrUtil.isEmpty(generate)){
log.info("生成英文名称失败entity:{}", word);
log.warn("生成英文名称失败entity:{}", word);
return;
}
ChineseEnglishWords words = new ChineseEnglishWords();

@ -15,6 +15,7 @@ import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
@Slf4j
@Service
@ -102,15 +103,15 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
log.info("doTableEre响应结果:{}", response);
// todo:暂时不去处理异常返回
EREDTO eredto = EREDTO.fromTableJson(response, truncateDTO.getId());
// 手动设置表格标题
EntityExtractionDTO titleEntity = new EntityExtractionDTO();
titleEntity.setEntity("表");
titleEntity.setName(truncateDTO.getTitle());
//
// 添加关系
ArrayList<RelationExtractionDTO> relations = new ArrayList<>();
List<RelationExtractionDTO> relations = new ArrayList<>();
for (EntityExtractionDTO entity : eredto.getEntities()) {
RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncateDTO.getId(),
titleEntity.getEntity(), titleEntity.getName(), "包含", entity.getEntity(), entity.getName(), entity.getAttributes());
titleEntity.getName(), titleEntity.getEntity(), "包含", entity.getName(), entity.getEntity(), entity.getAttributes());
relations.add(relationExtractionDTO);
}
eredto.getEntities().add(titleEntity);
@ -120,6 +121,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
/**
*
*
* @param eredtoList
* @return
*/
@ -129,6 +131,13 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
if (CollUtil.isEmpty(eredtoList)){
return merged;
}
// 将表单独拿出来
merged = eredtoList.stream().filter(ere->
ere.getEntities().stream().anyMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList());
// 把剩下的数据进行合并计算
eredtoList = eredtoList.stream().filter(ere->
ere.getEntities().stream().noneMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList());
Map<String, EntityExtractionDTO> entityMap = new HashMap<>();
Map<String, RelationExtractionDTO> relationMap = new HashMap<>();
for (EREDTO eredto : eredtoList) {
@ -184,17 +193,17 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
private void mergeAttribute(Map<String, RelationExtractionDTO> entityMap,RelationExtractionDTO relation, String key) {
RelationExtractionDTO cachedEntity = entityMap.get(key);
if (null == cachedEntity){
RelationExtractionDTO cachedRelation = entityMap.get(key);
if (null == cachedRelation){
entityMap.put(key, relation);
}else {
if (CollUtil.isEmpty(relation.getAttributes())){
return;
}
// 合并属性
List<ERAttributeDTO> attributes = relation.getAttributes();
if (null == attributes){
attributes = new ArrayList<>();
List<ERAttributeDTO> cachedAttributes = cachedRelation.getAttributes();
if (null == cachedAttributes){
cachedAttributes = new ArrayList<>();
}
for (ERAttributeDTO attribute : relation.getAttributes()) {
String attributeKey = attribute.getAttribute();
@ -203,8 +212,8 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
continue;
}
// 如果属性已经存在,则不添加
if (attributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) {
attributes.add(attribute);
if (cachedAttributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) {
cachedAttributes.add(attribute);
}
}
}
@ -219,9 +228,10 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
return;
}
// 合并属性
List<ERAttributeDTO> attributes = entity.getAttributes();
if (null == attributes){
attributes = new ArrayList<>();
List<ERAttributeDTO> cachedAttributes = cachedEntity.getAttributes();
if (null == cachedAttributes){
cachedAttributes = new ArrayList<>();
cachedEntity.setAttributes(cachedAttributes);
}
for (ERAttributeDTO attribute : entity.getAttributes()) {
String attributeKey = attribute.getAttribute();
@ -230,8 +240,8 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
continue;
}
// 如果属性已经存在,则不添加
if (attributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) {
attributes.add(attribute);
if (cachedAttributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) {
cachedAttributes.add(attribute);
}
}
}
@ -242,6 +252,6 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
}
private String generateRelationMapKey(RelationExtractionDTO relationExtractionDTO) {
return relationExtractionDTO.getSource() + "_" + relationExtractionDTO.getTarget() + "_" + relationExtractionDTO.getRelation();
return relationExtractionDTO.getSource()+ "_" + relationExtractionDTO.getRelation() + "_" + relationExtractionDTO.getTarget();
}
}

@ -7,7 +7,7 @@
<resultMap id="BaseResultMap" type="com.supervision.pdfqaserver.domain.TruncationErAttribute">
<id property="id" column="id" jdbcType="VARCHAR"/>
<result property="terId" column="ter_id" jdbcType="VARCHAR"/>
<result property="type" column="type" jdbcType="VARCHAR"/>
<result property="associationType" column="association_type" jdbcType="VARCHAR"/>
<result property="attribute" column="attribute" jdbcType="VARCHAR"/>
<result property="value" column="value" jdbcType="VARCHAR"/>
<result property="dataType" column="data_type" jdbcType="VARCHAR"/>
@ -16,7 +16,7 @@
</resultMap>
<sql id="Base_Column_List">
id,ter_id,type,
id,ter_id,associationType,
attribute,value,data_type,
create_time,update_time
</sql>

Loading…
Cancel
Save