You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

142 lines
5.3 KiB
Java

5 months ago
package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
5 months ago
import cn.hutool.core.util.StrUtil;
import com.supervision.pdfqaserver.constant.DomainMetaGenerationEnum;
import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
import com.supervision.pdfqaserver.domain.DomainMetadata;
import com.supervision.pdfqaserver.dto.*;
5 months ago
import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
import com.supervision.pdfqaserver.service.*;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
@Slf4j
@Service
@RequiredArgsConstructor
public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
private final TripleConversionPipeline tripleConversionPipeline;
private final TripleToCypherExecutor tripleToCypherExecutor;
private final ChineseEnglishWordsService chineseEnglishWordsService;
private final DocumentTruncationService documentTruncationService;
private final DomainMetadataService domainMetadataService;
private final PdfAnalysisOutputService pdfAnalysisOutputService;
private final TruncationEntityExtractionService truncationEntityExtractionService;
private final TruncationRelationExtractionService relationExtractionService;
5 months ago
private final ChinesEsToEnglishGenerator chinesEsToEnglishGenerator;
5 months ago
@Override
public void generateGraph(String documentId) {
List<PdfAnalysisOutput> pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(documentId);
if (CollUtil.isEmpty(pdfAnalysisOutputs)) {
log.info("没有找到pdfId为{}的pdf分析结果", documentId);
return;
}
List<DocumentDTO> documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).toList();
// 对文档进行切分
List<TruncateDTO> truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList);
// 保存分片信息
documentTruncationService.batchSave(truncateDTOS);
// 对切分后的文档进行命名实体识别
List<EREDTO> eredtoList = new ArrayList<>();
for (TruncateDTO truncateDTO : truncateDTOS) {
EREDTO eredto = tripleConversionPipeline.doEre(truncateDTO);
5 months ago
if (null == eredto){
continue;
}
5 months ago
// 保存实体关系抽取结果
this.saveERE(eredto, truncateDTO.getId());
}
// 合并实体关系抽取结果
List<EREDTO> mergedList = tripleConversionPipeline.mergeEreResults(eredtoList);
5 months ago
// 保存领域元数据
5 months ago
for (EREDTO eredto : mergedList) {
5 months ago
List<RelationExtractionDTO> relations = eredto.getRelations();
if (CollUtil.isEmpty(relations)){
continue;
}
for (RelationExtractionDTO relation : relations) {
DomainMetadata domainMetadata = relation.toDomainMetadata();
domainMetadata.setDomainType("1");
domainMetadata.setGenerationType(DomainMetaGenerationEnum.SYSTEM_AUTO_GENERATION.getCode());
domainMetadataService.saveIfNotExists(domainMetadata);
}
}
5 months ago
5 months ago
// 保存字典
List<ChineseEnglishWords> allWords = chineseEnglishWordsService.queryAll();
for (EREDTO eredto : mergedList) {
List<EntityExtractionDTO> entities = eredto.getEntities();
if (CollUtil.isNotEmpty(entities)){
for (EntityExtractionDTO entityDTO : entities) {
saveWordsIfNecessary(entityDTO.getEntity(), allWords);
}
}
List<RelationExtractionDTO> relations = eredto.getRelations();
if (CollUtil.isNotEmpty(relations)){
for (RelationExtractionDTO relationDTO : relations) {
saveWordsIfNecessary(relationDTO.getRelation(), allWords);
}
}
}
// 生成cypher语句
for (EREDTO eredto : mergedList) {
eredto.setEn(allWords);
String insertCypher = tripleToCypherExecutor.generateInsertCypher(eredto);
log.info("insertCypher:{}", insertCypher);
5 months ago
tripleToCypherExecutor.executeCypher(insertCypher);
}
5 months ago
}
5 months ago
5 months ago
private void saveWordsIfNecessary(String word, List<ChineseEnglishWords> allWords) {
boolean exists = chineseEnglishWordsService.wordsExists(word, allWords);
if (exists){
return;
}
String generate = chinesEsToEnglishGenerator.generate(word);
if (StrUtil.isEmpty(generate)){
log.info("生成英文名称失败entity:{}", word);
return;
}
ChineseEnglishWords words = new ChineseEnglishWords();
words.setChineseWord(word);
words.setEnglishWord(generate);
chineseEnglishWordsService.saveIfNotExists(words);
allWords.add(words);// 更新缓存
5 months ago
}
@Override
public void queryGraph(String databaseId, String query) {
}
@Override
public void saveERE(EREDTO eredto, String truncationId) {
5 months ago
// 保存实体信息
truncationEntityExtractionService.saveERE(eredto.getEntities());
// 保存关系
relationExtractionService.saveERE(eredto.getRelations());
5 months ago
}
}