|
|
@ -26,6 +26,7 @@ import java.util.HashMap;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
|
|
import java.util.stream.Stream;
|
|
|
|
import static com.supervision.pdfqaserver.cache.PromptCache.*;
|
|
|
|
import static com.supervision.pdfqaserver.cache.PromptCache.*;
|
|
|
|
|
|
|
|
|
|
|
|
@Slf4j
|
|
|
|
@Slf4j
|
|
|
@ -272,15 +273,16 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
|
|
|
|
log.info("queryRelationSchema: 分词结果:{}", terms);
|
|
|
|
log.info("queryRelationSchema: 分词结果:{}", terms);
|
|
|
|
log.info("queryRelationSchema: 开始进行文本标签向量匹配...");
|
|
|
|
log.info("queryRelationSchema: 开始进行文本标签向量匹配...");
|
|
|
|
List<NodeRelationVector> matchedText = new ArrayList<>();
|
|
|
|
List<NodeRelationVector> matchedText = new ArrayList<>();
|
|
|
|
|
|
|
|
List<String> keywords = mergeNodeAndRelationLabel();
|
|
|
|
for (TextTerm term : terms) {
|
|
|
|
for (TextTerm term : terms) {
|
|
|
|
if (StrUtil.isEmpty(term.getLabelValue())){
|
|
|
|
if (StrUtil.isEmpty(term.getLabelValue(keywords))){
|
|
|
|
log.info("queryRelationSchema: 分词结果`{}`不是关键标签,跳过...", term.getWord());
|
|
|
|
log.info("queryRelationSchema: 分词结果`{}`不是关键标签,跳过...", term.getWord());
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Embedding embedding = aiCallService.embedding(term.getLabelValue());
|
|
|
|
Embedding embedding = aiCallService.embedding(term.getLabelValue(keywords));
|
|
|
|
term.setEmbedding(embedding.getOutput());
|
|
|
|
term.setEmbedding(embedding.getOutput());
|
|
|
|
List<NodeRelationVector> textVectorDTOS = nodeRelationVectorService.matchSimilarByCosine(embedding.getOutput(), 0.9, List.of("N","R"),3); // 继续过滤
|
|
|
|
List<NodeRelationVector> textVectorDTOS = nodeRelationVectorService.matchSimilarByCosine(embedding.getOutput(), 0.9, List.of("N","R"),3); // 继续过滤
|
|
|
|
log.info("retrieval: 文本:{}匹配到的文本向量:{}", term.getWord() ,textVectorDTOS.stream().map(NodeRelationVector::getContent).collect(Collectors.joining(" ")));
|
|
|
|
log.info("retrieval: 文本:`{}`匹配到的文本向量:`{}`", term.getWord() ,textVectorDTOS.stream().map(NodeRelationVector::getContent).collect(Collectors.joining(" ")));
|
|
|
|
matchedText.addAll(textVectorDTOS);
|
|
|
|
matchedText.addAll(textVectorDTOS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (CollUtil.isEmpty(matchedText)){
|
|
|
|
if (CollUtil.isEmpty(matchedText)){
|
|
|
@ -306,7 +308,7 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
|
|
|
|
// 对查询到的关系进行重排序
|
|
|
|
// 对查询到的关系进行重排序
|
|
|
|
List<Pair<Double, RelationExtractionDTO>> pairs = new ArrayList<>();
|
|
|
|
List<Pair<Double, RelationExtractionDTO>> pairs = new ArrayList<>();
|
|
|
|
TimeInterval timeInterval = new TimeInterval();
|
|
|
|
TimeInterval timeInterval = new TimeInterval();
|
|
|
|
String join = terms.stream().map(TextTerm::getLabelValue).filter(StrUtil::isNotEmpty).collect(Collectors.joining());
|
|
|
|
String join = terms.stream().map(t->t.getLabelValue(keywords)).filter(StrUtil::isNotEmpty).collect(Collectors.joining());
|
|
|
|
Embedding embedding = aiCallService.embedding(join);
|
|
|
|
Embedding embedding = aiCallService.embedding(join);
|
|
|
|
for (RelationExtractionDTO relation : merged) {
|
|
|
|
for (RelationExtractionDTO relation : merged) {
|
|
|
|
String content = relation.getSourceType() + " " + relation.getRelation() + " " + relation.getTargetType();
|
|
|
|
String content = relation.getSourceType() + " " + relation.getRelation() + " " + relation.getTargetType();
|
|
|
@ -319,7 +321,7 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
log.info("queryRelationSchema: 关系排序耗时:{}ms", timeInterval.intervalMs());
|
|
|
|
log.info("queryRelationSchema: 关系排序耗时:{}ms", timeInterval.intervalMs());
|
|
|
|
|
|
|
|
|
|
|
|
merged = pairs.stream().sorted((p1, p2) -> Double.compare(p2.getKey(), p1.getKey())).limit(5).map(Pair::getValue).toList();
|
|
|
|
merged = pairs.stream().sorted((p1, p2) -> Double.compare(p2.getKey(), p1.getKey())).limit(4).map(Pair::getValue).toList();
|
|
|
|
List<EntityExtractionDTO> entityExtractionDTOS = new ArrayList<>();
|
|
|
|
List<EntityExtractionDTO> entityExtractionDTOS = new ArrayList<>();
|
|
|
|
for (RelationExtractionDTO relationExtractionDTO : merged) {
|
|
|
|
for (RelationExtractionDTO relationExtractionDTO : merged) {
|
|
|
|
EntityExtractionDTO sourceNode = cypherSchemaDTO.getNode(relationExtractionDTO.getSourceType());
|
|
|
|
EntityExtractionDTO sourceNode = cypherSchemaDTO.getNode(relationExtractionDTO.getSourceType());
|
|
|
@ -392,4 +394,14 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private List<String> mergeNodeAndRelationLabel() {
|
|
|
|
|
|
|
|
loadCypherSchemaIfAbsent();
|
|
|
|
|
|
|
|
if (CollUtil.isEmpty(cypherSchemaDTO.getRelations())) {
|
|
|
|
|
|
|
|
log.warn("图谱schema数据为空,无法合并节点和关系标签");
|
|
|
|
|
|
|
|
return new ArrayList<>();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return cypherSchemaDTO.getRelations().stream()
|
|
|
|
|
|
|
|
.flatMap(r -> Stream.of(r.getSourceType(), r.getRelation(), r.getTargetType())).distinct().collect(Collectors.toList());
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|