You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
85 lines
2.8 KiB
Java
85 lines
2.8 KiB
Java
5 months ago
|
package com.supervision.pdfqaserver.service.impl;
|
||
|
|
||
|
import cn.hutool.core.collection.CollUtil;
|
||
|
import com.supervision.pdfqaserver.dto.EREDTO;
|
||
|
import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
|
||
|
import com.supervision.pdfqaserver.dto.DocumentDTO;
|
||
|
import com.supervision.pdfqaserver.dto.TruncateDTO;
|
||
|
import com.supervision.pdfqaserver.service.*;
|
||
|
import lombok.RequiredArgsConstructor;
|
||
|
import lombok.extern.slf4j.Slf4j;
|
||
|
import org.springframework.stereotype.Service;
|
||
|
|
||
|
import java.util.ArrayList;
|
||
|
import java.util.List;
|
||
|
|
||
|
@Slf4j
|
||
|
@Service
|
||
|
@RequiredArgsConstructor
|
||
|
public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
|
||
|
|
||
|
private final TripleConversionPipeline tripleConversionPipeline;
|
||
|
|
||
|
private final TripleToCypherExecutor tripleToCypherExecutor;
|
||
|
|
||
|
private final ChineseEnglishWordsService chineseEnglishWordsService;
|
||
|
|
||
|
private final DocumentTruncationService documentTruncationService;
|
||
|
|
||
|
private final DomainMetadataService domainMetadataService;
|
||
|
|
||
|
private final PdfAnalysisOutputService pdfAnalysisOutputService;
|
||
|
|
||
|
private final PdfInfoService pdfInfoService;
|
||
|
|
||
|
private final TruncationEntityExtractionService truncationEntityExtractionService;
|
||
|
|
||
|
private final TruncationRelationExtractionService relationExtractionService;
|
||
|
|
||
|
private final TruncationErAttributeService truncationErAttributeService;
|
||
|
|
||
|
@Override
|
||
|
public void generateGraph(String documentId) {
|
||
|
List<PdfAnalysisOutput> pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(documentId);
|
||
|
if (CollUtil.isEmpty(pdfAnalysisOutputs)) {
|
||
|
log.info("没有找到pdfId为{}的pdf分析结果", documentId);
|
||
|
return;
|
||
|
}
|
||
|
List<DocumentDTO> documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).toList();
|
||
|
// 对文档进行切分
|
||
|
List<TruncateDTO> truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList);
|
||
|
// 保存分片信息
|
||
|
documentTruncationService.batchSave(truncateDTOS);
|
||
|
|
||
|
// 对切分后的文档进行命名实体识别
|
||
|
List<EREDTO> eredtoList = new ArrayList<>();
|
||
|
for (TruncateDTO truncateDTO : truncateDTOS) {
|
||
|
EREDTO eredto = tripleConversionPipeline.doEre(truncateDTO);
|
||
|
// 保存实体关系抽取结果
|
||
|
this.saveERE(eredto, truncateDTO.getId());
|
||
|
}
|
||
|
|
||
|
// 合并实体关系抽取结果
|
||
|
List<EREDTO> mergedList = tripleConversionPipeline.mergeEreResults(eredtoList);
|
||
|
|
||
|
for (EREDTO eredto : mergedList) {
|
||
|
String insertCypher = tripleToCypherExecutor.generateInsertCypher(eredto);
|
||
|
|
||
|
tripleToCypherExecutor.executeCypher(insertCypher);
|
||
|
}
|
||
|
|
||
|
|
||
|
}
|
||
|
|
||
|
@Override
|
||
|
public void queryGraph(String databaseId, String query) {
|
||
|
|
||
|
}
|
||
|
|
||
|
@Override
|
||
|
public void saveERE(EREDTO eredto, String truncationId) {
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|