diff --git a/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java index dc28f87..70500b6 100644 --- a/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java +++ b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java @@ -887,7 +887,7 @@ public class PromptCache { {query} ``` neo4j_schema以JSON格式定义如下: - ```shema + ```schema {schema} ``` # 环境变量 diff --git a/src/main/java/com/supervision/pdfqaserver/domain/KeywordSynonym.java b/src/main/java/com/supervision/pdfqaserver/domain/KeywordSynonym.java new file mode 100644 index 0000000..3246f7b --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/domain/KeywordSynonym.java @@ -0,0 +1,58 @@ +package com.supervision.pdfqaserver.domain; + +import com.baomidou.mybatisplus.annotation.FieldFill; +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import java.io.Serializable; +import java.time.LocalDateTime; +import lombok.Data; + +/** + * 关键字同义词表 + * @TableName keyword_synonym + */ +@TableName(value ="keyword_synonym") +@Data +public class KeywordSynonym implements Serializable { + /** + * 主键 + */ + @TableId + private String id; + + /** + * 标准术语 + */ + private String standardTerm; + + /** + * 同义词 + */ + private String synonyms; + + /** + * 词的频率 数值越大,优先级越高 + */ + private Integer frequency; + + /** + * 词性 + */ + private String nature; + + /** + * 创建时间 + */ + @TableField(fill = FieldFill.INSERT) + private LocalDateTime createTime; + + /** + * 更新时间 + */ + @TableField(fill = FieldFill.INSERT_UPDATE) + private LocalDateTime updateTime; + + @TableField(exist = false) + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/dto/KeywordSynonymDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/KeywordSynonymDTO.java new file mode 100644 index 0000000..6b9d9b2 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/dto/KeywordSynonymDTO.java @@ -0,0 +1,26 @@ +package com.supervision.pdfqaserver.dto; + +import lombok.Data; +import java.util.ArrayList; +import java.util.List; + +@Data +public class KeywordSynonymDTO { + + /** + * 词语 + */ + private String term; + + /** + * 词频 + */ + private Integer frequency; + + /** + * 词性 + */ + private String nature; + + private List synonyms = new ArrayList<>(); +} diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/KeywordSynonymMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/KeywordSynonymMapper.java new file mode 100644 index 0000000..018c07b --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/mapper/KeywordSynonymMapper.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.mapper; + +import com.supervision.pdfqaserver.domain.KeywordSynonym; +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +/** +* @author Administrator +* @description 针对表【keyword_synonym(关键字同义词表)】的数据库操作Mapper +* @createDate 2025-06-23 16:49:09 +* @Entity com.supervision.pdfqaserver.domain.KeywordSynonym +*/ +public interface KeywordSynonymMapper extends BaseMapper { + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/KeywordSynonymService.java b/src/main/java/com/supervision/pdfqaserver/service/KeywordSynonymService.java new file mode 100644 index 0000000..803fab9 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/KeywordSynonymService.java @@ -0,0 +1,19 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.domain.KeywordSynonym; +import com.baomidou.mybatisplus.extension.service.IService; +import com.supervision.pdfqaserver.dto.KeywordSynonymDTO; +import java.util.List; + +/** +* @author Administrator +* @description 针对表【keyword_synonym(关键字同义词表)】的数据库操作Service +* @createDate 2025-06-23 16:49:09 +*/ +public interface KeywordSynonymService extends IService { + + + List listAllSynonyms(); + + String getStandardTerm(String term,List synonyms); +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/Retriever.java b/src/main/java/com/supervision/pdfqaserver/service/Retriever.java index 3e128e7..993489e 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/Retriever.java +++ b/src/main/java/com/supervision/pdfqaserver/service/Retriever.java @@ -14,4 +14,12 @@ public interface Retriever { * @return 结果数据 */ List> retrieval(String query); + + + /** + * 重写查询语句 + * @param query 原始查询语句 + * @return 重写后的查询语句 + */ + String rewriteQuery(String query); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/ChatServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/ChatServiceImpl.java index 5b98413..ce21e46 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/ChatServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/ChatServiceImpl.java @@ -51,16 +51,6 @@ public class ChatServiceImpl implements ChatService { public Flux knowledgeQA(String userQuery) { log.info("用户查询: {}", userQuery); - // 生成cypher语句 - /*String cypher = tripleToCypherExecutor.generateQueryCypher(userQuery,null); - log.info("生成CYPHER语句的消息:{}", cypher); - if (StrUtil.isEmpty(cypher)){ - return Flux.just("查无结果").concatWith(Flux.just("[END]")); - } - - // 执行cypher语句 - List> graphResult = tripleToCypherExecutor.executeCypher(cypher); - */ List> graphResult = compareRetriever.retrieval(userQuery); if (CollUtil.isEmpty(graphResult)){ return Flux.just("查无结果").concatWith(Flux.just("[END]")); diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DataCompareRetriever.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DataCompareRetriever.java index 099826b..69e0816 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/DataCompareRetriever.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DataCompareRetriever.java @@ -2,20 +2,23 @@ package com.supervision.pdfqaserver.service.impl; import cn.hutool.core.collection.CollUtil; import cn.hutool.core.date.DateUtil; +import cn.hutool.core.lang.Assert; import cn.hutool.core.util.StrUtil; import cn.hutool.json.JSONArray; import cn.hutool.json.JSONUtil; import com.supervision.pdfqaserver.cache.PromptCache; import com.supervision.pdfqaserver.dto.CypherSchemaDTO; -import com.supervision.pdfqaserver.dto.neo4j.RelationshipValueDTO; +import com.supervision.pdfqaserver.dto.KeywordSynonymDTO; +import com.supervision.pdfqaserver.dto.TextTerm; import com.supervision.pdfqaserver.service.*; +import jakarta.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; - import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import static com.supervision.pdfqaserver.cache.PromptCache.*; /** @@ -28,8 +31,13 @@ public class DataCompareRetriever implements Retriever { private final TripleToCypherExecutor tripleToCypherExecutor; - private final AiCallService aiCallService; + + private final KeywordSynonymService keywordSynonymService; + + private final TextToSegmentService textToSegmentService; + + private List synonyms; @Override public List> retrieval(String query) { log.info("retrieval: 执行数据对比检索器,查询内容:{}", query); @@ -88,4 +96,34 @@ public class DataCompareRetriever implements Retriever { return result; } + + @Override + public String rewriteQuery(String query) { + Assert.notEmpty(query, "查询内容不能为空"); + List terms = textToSegmentService.segmentText(query); + return terms.stream().map(i -> { + String standardTerm = keywordSynonymService.getStandardTerm(i.getWord(), synonyms); + return standardTerm != null ? standardTerm : i.getWord(); + }) + .collect(Collectors.joining()); + } + + @PostConstruct + public void init() { + log.info("DataCompareRetriever initialized"); + // 初始化同义词数据 + synonyms = keywordSynonymService.listAllSynonyms(); + if (CollUtil.isNotEmpty(synonyms)) { + for (KeywordSynonymDTO synonym : synonyms) { + textToSegmentService.addDict(synonym.getTerm(), synonym.getNature(), synonym.getFrequency()); + if (CollUtil.isNotEmpty(synonym.getSynonyms())) { + for (KeywordSynonymDTO subSynonym : synonym.getSynonyms()) { + textToSegmentService.addDict(subSynonym.getTerm(), subSynonym.getNature(), subSynonym.getFrequency()); + } + } + } + } else { + log.warn("DataCompareRetriever: 未找到任何同义词,不添加字典数据..."); + } + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/KeywordSynonymServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/KeywordSynonymServiceImpl.java new file mode 100644 index 0000000..a99a626 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/KeywordSynonymServiceImpl.java @@ -0,0 +1,87 @@ +package com.supervision.pdfqaserver.service.impl; + +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.util.StrUtil; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.supervision.pdfqaserver.domain.KeywordSynonym; +import com.supervision.pdfqaserver.dto.KeywordSynonymDTO; +import com.supervision.pdfqaserver.service.KeywordSynonymService; +import com.supervision.pdfqaserver.mapper.KeywordSynonymMapper; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** +* @author Administrator +* @description 针对表【keyword_synonym(关键字同义词表)】的数据库操作Service实现 +* @createDate 2025-06-23 16:49:09 +*/ +@Slf4j +@Service +public class KeywordSynonymServiceImpl extends ServiceImpl + implements KeywordSynonymService{ + + @Override + public List listAllSynonyms() { + List list = super.lambdaQuery().list(); + List result = new ArrayList<>(); + if (CollUtil.isEmpty(list)){ + return result; + } + + for (KeywordSynonym synonym : list) { + String standardTerm = synonym.getStandardTerm(); + if (StrUtil.isEmpty(standardTerm)){ + continue; + } + Optional optional = result.stream().filter(s -> StrUtil.equals(s.getTerm(), standardTerm)).findAny(); + KeywordSynonymDTO keywordSynonymDTO; + if (optional.isEmpty()){ + keywordSynonymDTO = new KeywordSynonymDTO(); + keywordSynonymDTO.setTerm(standardTerm); + keywordSynonymDTO.setFrequency(synonym.getFrequency()); + keywordSynonymDTO.setNature(synonym.getNature()); + result.add(keywordSynonymDTO); + }else { + keywordSynonymDTO = optional.get(); + } + List synonyms = keywordSynonymDTO.getSynonyms(); + Optional any = synonyms.stream().filter(s -> StrUtil.equals(s.getTerm(), synonym.getSynonyms())) + .findAny(); + if (any.isEmpty()) { + KeywordSynonymDTO synonymDTO = new KeywordSynonymDTO(); + synonymDTO.setTerm(synonym.getSynonyms()); + synonymDTO.setFrequency(synonym.getFrequency()); + synonymDTO.setNature(synonym.getNature()); + synonyms.add(synonymDTO); + }else { + log.warn("listAllSynonyms: 同义词重复,词语:{},同义词:{}", standardTerm, synonym.getSynonyms()); + } + } + return result; + } + + @Override + public String getStandardTerm(String term, List synonyms) { + if (StrUtil.isEmpty(term) || CollUtil.isEmpty(synonyms)) { + return null; + } + for (KeywordSynonymDTO synonym : synonyms) { + if (StrUtil.equals(synonym.getTerm(), term)) { + return synonym.getTerm(); + } + for (KeywordSynonymDTO subSynonym : synonym.getSynonyms()) { + if (StrUtil.equals(subSynonym.getTerm(), term)) { + return synonym.getTerm(); + } + } + } + return null; + } +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java index 1b0830c..c33eb42 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java @@ -40,6 +40,8 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { private final DomainMetadataService domainMetadataService; + private final KeywordSynonymService keywordSynonymService; + @Override public DocumentContentTypeEnum makeOutPdfContentType(Integer pdfId) { Assert.notNull(pdfId, "pdfId不能为空"); @@ -588,9 +590,22 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { merged = eredtoList.stream().filter(ere-> ere.getEntities().stream().anyMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList()); + List synonymDTOS = keywordSynonymService.listAllSynonyms(); // 把剩下的数据进行合并计算 eredtoList = eredtoList.stream().filter(ere-> - ere.getEntities().stream().noneMatch(e->StrUtil.equals(e.getEntity(),"表"))).collect(Collectors.toList()); + ere.getEntities().stream().noneMatch(e->StrUtil.equals(e.getEntity(),"表"))) + .peek(ere->{ + // 对实体名称进行同义词转换 + List entities = ere.getEntities(); + for (EntityExtractionDTO entity : entities) { + String name = entity.getName(); + String standardTerm = keywordSynonymService.getStandardTerm(name, synonymDTOS); + if (StrUtil.isNotEmpty(standardTerm)) { + entity.setName(standardTerm); + } + } + }) + .collect(Collectors.toList()); Map entityMap = new HashMap<>(); Map relationMap = new HashMap<>(); for (EREDTO eredto : eredtoList) { diff --git a/src/main/resources/mapper/KeywordSynonymMapper.xml b/src/main/resources/mapper/KeywordSynonymMapper.xml new file mode 100644 index 0000000..50b7218 --- /dev/null +++ b/src/main/resources/mapper/KeywordSynonymMapper.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + id,standard_term,synonyms, + frequency,nature,create_time, + update_time + + diff --git a/src/test/java/com/supervision/pdfqaserver/PdfQaServerApplicationTests.java b/src/test/java/com/supervision/pdfqaserver/PdfQaServerApplicationTests.java index 7fb3298..ded5e4d 100644 --- a/src/test/java/com/supervision/pdfqaserver/PdfQaServerApplicationTests.java +++ b/src/test/java/com/supervision/pdfqaserver/PdfQaServerApplicationTests.java @@ -352,8 +352,16 @@ class PdfQaServerApplicationTests { TimeInterval timer = new TimeInterval(); textToSegmentService.addDict("龙源电力集团","企业",1000); - List> retrieval = retriever.retrieval("龙源电力集团近三年营收情况是多少"); + List> retrieval = retriever.retrieval("龙源电力公司近三年营收情况是多少"); System.out.println(JSONUtil.toJsonStr(retrieval)); log.info("<<<===========================>>> 耗时: {} 毫秒", timer.intervalMs()); } + + @Test + public void rewriteQueryTest() { + // 测试文本分词 + String text = "龙源电力近三年营收情况是多少?"; + String rewriteQuery = retriever.rewriteQuery(text); + log.info("重写前:{} \n 重写后的结果: {}", text, rewriteQuery); + } }