添加实体统一功能(初版)

v_0.0.2
xueqingkun 3 months ago
parent e181c00c40
commit fe38922324

@ -887,7 +887,7 @@ public class PromptCache {
{query} {query}
``` ```
neo4j_schemaJSON neo4j_schemaJSON
```shema ```schema
{schema} {schema}
``` ```
# #

@ -0,0 +1,58 @@
package com.supervision.pdfqaserver.domain;
import com.baomidou.mybatisplus.annotation.FieldFill;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import java.io.Serializable;
import java.time.LocalDateTime;
import lombok.Data;
/**
*
* @TableName keyword_synonym
*/
@TableName(value ="keyword_synonym")
@Data
public class KeywordSynonym implements Serializable {
/**
*
*/
@TableId
private String id;
/**
*
*/
private String standardTerm;
/**
*
*/
private String synonyms;
/**
*
*/
private Integer frequency;
/**
*
*/
private String nature;
/**
*
*/
@TableField(fill = FieldFill.INSERT)
private LocalDateTime createTime;
/**
*
*/
@TableField(fill = FieldFill.INSERT_UPDATE)
private LocalDateTime updateTime;
@TableField(exist = false)
private static final long serialVersionUID = 1L;
}

@ -0,0 +1,26 @@
package com.supervision.pdfqaserver.dto;
import lombok.Data;
import java.util.ArrayList;
import java.util.List;
@Data
public class KeywordSynonymDTO {
/**
*
*/
private String term;
/**
*
*/
private Integer frequency;
/**
*
*/
private String nature;
private List<KeywordSynonymDTO> synonyms = new ArrayList<>();
}

@ -0,0 +1,18 @@
package com.supervision.pdfqaserver.mapper;
import com.supervision.pdfqaserver.domain.KeywordSynonym;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
/**
* @author Administrator
* @description keyword_synonym()Mapper
* @createDate 2025-06-23 16:49:09
* @Entity com.supervision.pdfqaserver.domain.KeywordSynonym
*/
public interface KeywordSynonymMapper extends BaseMapper<KeywordSynonym> {
}

@ -0,0 +1,19 @@
package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.domain.KeywordSynonym;
import com.baomidou.mybatisplus.extension.service.IService;
import com.supervision.pdfqaserver.dto.KeywordSynonymDTO;
import java.util.List;
/**
* @author Administrator
* @description keyword_synonym()Service
* @createDate 2025-06-23 16:49:09
*/
public interface KeywordSynonymService extends IService<KeywordSynonym> {
List<KeywordSynonymDTO> listAllSynonyms();
String getStandardTerm(String term,List<KeywordSynonymDTO> synonyms);
}

@ -14,4 +14,12 @@ public interface Retriever {
* @return * @return
*/ */
List<Map<String, Object>> retrieval(String query); List<Map<String, Object>> retrieval(String query);
/**
*
* @param query
* @return
*/
String rewriteQuery(String query);
} }

@ -51,16 +51,6 @@ public class ChatServiceImpl implements ChatService {
public Flux<String> knowledgeQA(String userQuery) { public Flux<String> knowledgeQA(String userQuery) {
log.info("用户查询: {}", userQuery); log.info("用户查询: {}", userQuery);
// 生成cypher语句
/*String cypher = tripleToCypherExecutor.generateQueryCypher(userQuery,null);
log.info("生成CYPHER语句的消息{}", cypher);
if (StrUtil.isEmpty(cypher)){
return Flux.just("查无结果").concatWith(Flux.just("[END]"));
}
// 执行cypher语句
List<Map<String, Object>> graphResult = tripleToCypherExecutor.executeCypher(cypher);
*/
List<Map<String, Object>> graphResult = compareRetriever.retrieval(userQuery); List<Map<String, Object>> graphResult = compareRetriever.retrieval(userQuery);
if (CollUtil.isEmpty(graphResult)){ if (CollUtil.isEmpty(graphResult)){
return Flux.just("查无结果").concatWith(Flux.just("[END]")); return Flux.just("查无结果").concatWith(Flux.just("[END]"));

@ -2,20 +2,23 @@ package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.date.DateUtil; import cn.hutool.core.date.DateUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONArray; import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONUtil; import cn.hutool.json.JSONUtil;
import com.supervision.pdfqaserver.cache.PromptCache; import com.supervision.pdfqaserver.cache.PromptCache;
import com.supervision.pdfqaserver.dto.CypherSchemaDTO; import com.supervision.pdfqaserver.dto.CypherSchemaDTO;
import com.supervision.pdfqaserver.dto.neo4j.RelationshipValueDTO; import com.supervision.pdfqaserver.dto.KeywordSynonymDTO;
import com.supervision.pdfqaserver.dto.TextTerm;
import com.supervision.pdfqaserver.service.*; import com.supervision.pdfqaserver.service.*;
import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors;
import static com.supervision.pdfqaserver.cache.PromptCache.*; import static com.supervision.pdfqaserver.cache.PromptCache.*;
/** /**
@ -28,8 +31,13 @@ public class DataCompareRetriever implements Retriever {
private final TripleToCypherExecutor tripleToCypherExecutor; private final TripleToCypherExecutor tripleToCypherExecutor;
private final AiCallService aiCallService; private final AiCallService aiCallService;
private final KeywordSynonymService keywordSynonymService;
private final TextToSegmentService textToSegmentService;
private List<KeywordSynonymDTO> synonyms;
@Override @Override
public List<Map<String, Object>> retrieval(String query) { public List<Map<String, Object>> retrieval(String query) {
log.info("retrieval: 执行数据对比检索器,查询内容:{}", query); log.info("retrieval: 执行数据对比检索器,查询内容:{}", query);
@ -88,4 +96,34 @@ public class DataCompareRetriever implements Retriever {
return result; return result;
} }
@Override
public String rewriteQuery(String query) {
Assert.notEmpty(query, "查询内容不能为空");
List<TextTerm> terms = textToSegmentService.segmentText(query);
return terms.stream().map(i -> {
String standardTerm = keywordSynonymService.getStandardTerm(i.getWord(), synonyms);
return standardTerm != null ? standardTerm : i.getWord();
})
.collect(Collectors.joining());
}
@PostConstruct
public void init() {
log.info("DataCompareRetriever initialized");
// 初始化同义词数据
synonyms = keywordSynonymService.listAllSynonyms();
if (CollUtil.isNotEmpty(synonyms)) {
for (KeywordSynonymDTO synonym : synonyms) {
textToSegmentService.addDict(synonym.getTerm(), synonym.getNature(), synonym.getFrequency());
if (CollUtil.isNotEmpty(synonym.getSynonyms())) {
for (KeywordSynonymDTO subSynonym : synonym.getSynonyms()) {
textToSegmentService.addDict(subSynonym.getTerm(), subSynonym.getNature(), subSynonym.getFrequency());
}
}
}
} else {
log.warn("DataCompareRetriever: 未找到任何同义词,不添加字典数据...");
}
}
} }

@ -0,0 +1,87 @@
package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.supervision.pdfqaserver.domain.KeywordSynonym;
import com.supervision.pdfqaserver.dto.KeywordSynonymDTO;
import com.supervision.pdfqaserver.service.KeywordSynonymService;
import com.supervision.pdfqaserver.mapper.KeywordSynonymMapper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
/**
* @author Administrator
* @description keyword_synonym()Service
* @createDate 2025-06-23 16:49:09
*/
@Slf4j
@Service
public class KeywordSynonymServiceImpl extends ServiceImpl<KeywordSynonymMapper, KeywordSynonym>
implements KeywordSynonymService{
@Override
public List<KeywordSynonymDTO> listAllSynonyms() {
List<KeywordSynonym> list = super.lambdaQuery().list();
List<KeywordSynonymDTO> result = new ArrayList<>();
if (CollUtil.isEmpty(list)){
return result;
}
for (KeywordSynonym synonym : list) {
String standardTerm = synonym.getStandardTerm();
if (StrUtil.isEmpty(standardTerm)){
continue;
}
Optional<KeywordSynonymDTO> optional = result.stream().filter(s -> StrUtil.equals(s.getTerm(), standardTerm)).findAny();
KeywordSynonymDTO keywordSynonymDTO;
if (optional.isEmpty()){
keywordSynonymDTO = new KeywordSynonymDTO();
keywordSynonymDTO.setTerm(standardTerm);
keywordSynonymDTO.setFrequency(synonym.getFrequency());
keywordSynonymDTO.setNature(synonym.getNature());
result.add(keywordSynonymDTO);
}else {
keywordSynonymDTO = optional.get();
}
List<KeywordSynonymDTO> synonyms = keywordSynonymDTO.getSynonyms();
Optional<KeywordSynonymDTO> any = synonyms.stream().filter(s -> StrUtil.equals(s.getTerm(), synonym.getSynonyms()))
.findAny();
if (any.isEmpty()) {
KeywordSynonymDTO synonymDTO = new KeywordSynonymDTO();
synonymDTO.setTerm(synonym.getSynonyms());
synonymDTO.setFrequency(synonym.getFrequency());
synonymDTO.setNature(synonym.getNature());
synonyms.add(synonymDTO);
}else {
log.warn("listAllSynonyms: 同义词重复,词语:{},同义词:{}", standardTerm, synonym.getSynonyms());
}
}
return result;
}
@Override
public String getStandardTerm(String term, List<KeywordSynonymDTO> synonyms) {
if (StrUtil.isEmpty(term) || CollUtil.isEmpty(synonyms)) {
return null;
}
for (KeywordSynonymDTO synonym : synonyms) {
if (StrUtil.equals(synonym.getTerm(), term)) {
return synonym.getTerm();
}
for (KeywordSynonymDTO subSynonym : synonym.getSynonyms()) {
if (StrUtil.equals(subSynonym.getTerm(), term)) {
return synonym.getTerm();
}
}
}
return null;
}
}

@ -40,6 +40,8 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
private final DomainMetadataService domainMetadataService; private final DomainMetadataService domainMetadataService;
private final KeywordSynonymService keywordSynonymService;
@Override @Override
public DocumentContentTypeEnum makeOutPdfContentType(Integer pdfId) { public DocumentContentTypeEnum makeOutPdfContentType(Integer pdfId) {
Assert.notNull(pdfId, "pdfId不能为空"); Assert.notNull(pdfId, "pdfId不能为空");
@ -588,9 +590,22 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
merged = eredtoList.stream().filter(ere-> merged = eredtoList.stream().filter(ere->
ere.getEntities().stream().anyMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList()); ere.getEntities().stream().anyMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList());
List<KeywordSynonymDTO> synonymDTOS = keywordSynonymService.listAllSynonyms();
// 把剩下的数据进行合并计算 // 把剩下的数据进行合并计算
eredtoList = eredtoList.stream().filter(ere-> eredtoList = eredtoList.stream().filter(ere->
ere.getEntities().stream().noneMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList()); ere.getEntities().stream().noneMatch(e->StrUtil.equals(e.getEntity(),"表")))
.peek(ere->{
// 对实体名称进行同义词转换
List<EntityExtractionDTO> entities = ere.getEntities();
for (EntityExtractionDTO entity : entities) {
String name = entity.getName();
String standardTerm = keywordSynonymService.getStandardTerm(name, synonymDTOS);
if (StrUtil.isNotEmpty(standardTerm)) {
entity.setName(standardTerm);
}
}
})
.collect(Collectors.toList());
Map<String, EntityExtractionDTO> entityMap = new HashMap<>(); Map<String, EntityExtractionDTO> entityMap = new HashMap<>();
Map<String, RelationExtractionDTO> relationMap = new HashMap<>(); Map<String, RelationExtractionDTO> relationMap = new HashMap<>();
for (EREDTO eredto : eredtoList) { for (EREDTO eredto : eredtoList) {

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.supervision.pdfqaserver.mapper.KeywordSynonymMapper">
<resultMap id="BaseResultMap" type="com.supervision.pdfqaserver.domain.KeywordSynonym">
<id property="id" column="id" jdbcType="VARCHAR"/>
<result property="standardTerm" column="standard_term" jdbcType="VARCHAR"/>
<result property="synonyms" column="synonyms" jdbcType="VARCHAR"/>
<result property="frequency" column="frequency" jdbcType="INTEGER"/>
<result property="nature" column="nature" jdbcType="VARCHAR"/>
<result property="createTime" column="create_time" jdbcType="TIMESTAMP"/>
<result property="updateTime" column="update_time" jdbcType="TIMESTAMP"/>
</resultMap>
<sql id="Base_Column_List">
id,standard_term,synonyms,
frequency,nature,create_time,
update_time
</sql>
</mapper>

@ -352,8 +352,16 @@ class PdfQaServerApplicationTests {
TimeInterval timer = new TimeInterval(); TimeInterval timer = new TimeInterval();
textToSegmentService.addDict("龙源电力集团","企业",1000); textToSegmentService.addDict("龙源电力集团","企业",1000);
List<Map<String, Object>> retrieval = retriever.retrieval("龙源电力集团近三年营收情况是多少"); List<Map<String, Object>> retrieval = retriever.retrieval("龙源电力公司近三年营收情况是多少");
System.out.println(JSONUtil.toJsonStr(retrieval)); System.out.println(JSONUtil.toJsonStr(retrieval));
log.info("<<<===========================>>> 耗时: {} 毫秒", timer.intervalMs()); log.info("<<<===========================>>> 耗时: {} 毫秒", timer.intervalMs());
} }
@Test
public void rewriteQueryTest() {
// 测试文本分词
String text = "龙源电力近三年营收情况是多少?";
String rewriteQuery = retriever.rewriteQuery(text);
log.info("重写前:{} \n 重写后的结果: {}", text, rewriteQuery);
}
} }

Loading…
Cancel
Save