package com.supervision.pdfqaserver.service.impl; import cn.hutool.core.collection.CollUtil; import cn.hutool.core.date.TimeInterval; import cn.hutool.core.util.StrUtil; import com.supervision.pdfqaserver.constant.DomainMetaGenerationEnum; import com.supervision.pdfqaserver.domain.ChineseEnglishWords; import com.supervision.pdfqaserver.domain.DocumentTruncation; import com.supervision.pdfqaserver.domain.DomainMetadata; import com.supervision.pdfqaserver.dto.*; import com.supervision.pdfqaserver.domain.PdfAnalysisOutput; import com.supervision.pdfqaserver.service.*; import com.supervision.pdfqaserver.thread.KnowledgeGraphGenerateTreadPool; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.aop.framework.AopContext; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import java.util.ArrayList; import java.util.List; @Slf4j @Service @RequiredArgsConstructor public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { private final TripleConversionPipeline tripleConversionPipeline; private final TripleToCypherExecutor tripleToCypherExecutor; private final ChineseEnglishWordsService chineseEnglishWordsService; private final DocumentTruncationService documentTruncationService; private final DomainMetadataService domainMetadataService; private final PdfAnalysisOutputService pdfAnalysisOutputService; private final TruncationEntityExtractionService truncationEntityExtractionService; private final TruncationRelationExtractionService relationExtractionService; private final ChinesEsToEnglishGenerator chinesEsToEnglishGenerator; private final PdfInfoService pdfInfoService; @Override public void generateGraph(String documentId) { ((KnowledgeGraphService)AopContext.currentProxy()).resetGraphData(documentId); List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(Integer.valueOf(documentId)); if (CollUtil.isEmpty(pdfAnalysisOutputs)) { log.info("没有找到pdfId为{}的pdf分析结果", documentId); return; } List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).toList(); // 对文档进行切分 TimeInterval timer = new TimeInterval(); timer.start("sliceDocuments"); log.info("开始切分文档,初始文档个数:{}",documentDTOList.size()); List truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList); log.info("切分文档完成,切分后文档个数:{},耗时:{}秒",truncateDTOS.size(), timer.intervalSecond("sliceDocuments")); // 保存分片信息 documentTruncationService.batchSave(truncateDTOS); // 对切分后的文档进行命名实体识别 timer.start("doEre"); log.info("开始命名实体识别..."); List eredtoList = new ArrayList<>(); for (TruncateDTO truncateDTO : truncateDTOS) { EREDTO eredto = null; try { eredto = tripleConversionPipeline.doEre(truncateDTO); } catch (Exception e) { log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e); } if (null == eredto){ continue; } // 保存实体关系抽取结果 this.saveERE(eredto, truncateDTO.getId()); eredtoList.add(eredto); } log.info("命名实体识别完成,耗时:{}秒", timer.intervalSecond("doEre")); // 合并实体关系抽取结果 log.info("开始合并实体关系抽取结果..."); List mergedList = tripleConversionPipeline.mergeEreResults(eredtoList); log.info("合并实体关系抽取结果完成,合并后个数:{}", mergedList.size()); // 保存领域元数据 log.info("开始保存领域元数据..."); for (EREDTO eredto : mergedList) { List relations = eredto.getRelations(); if (CollUtil.isEmpty(relations)){ continue; } for (RelationExtractionDTO relation : relations) { DomainMetadata domainMetadata = relation.toDomainMetadata(); domainMetadata.setDomainType("1"); domainMetadata.setGenerationType(DomainMetaGenerationEnum.SYSTEM_AUTO_GENERATION.getCode()); domainMetadataService.saveIfNotExists(domainMetadata); } } log.info("保存领域元数据完成...."); // 保存字典 log.info("开始保存字典..."); List allWords = chineseEnglishWordsService.queryAll(); int wordsSize = allWords.size(); for (EREDTO eredto : mergedList) { List entities = eredto.getEntities(); if (CollUtil.isNotEmpty(entities)){ for (EntityExtractionDTO entityDTO : entities) { saveWordsIfNecessary(entityDTO.getEntity(), allWords); if (CollUtil.isNotEmpty(entityDTO.getAttributes())){ for (ERAttributeDTO attribute : entityDTO.getAttributes()) { saveWordsIfNecessary(attribute.getAttribute(), allWords); } } } } List relations = eredto.getRelations(); if (CollUtil.isNotEmpty(relations)){ for (RelationExtractionDTO relationDTO : relations) { saveWordsIfNecessary(relationDTO.getRelation(), allWords); if (CollUtil.isNotEmpty(relationDTO.getAttributes())){ for (ERAttributeDTO attribute : relationDTO.getAttributes()) { saveWordsIfNecessary(attribute.getAttribute(), allWords); } } } } } log.info("保存字典完成,新增字典个数:{}", allWords.size() - wordsSize); // 生成cypher语句 for (EREDTO eredto : mergedList) { if (CollUtil.isEmpty(eredto.getEntities()) && CollUtil.isEmpty(eredto.getRelations())){ continue; } eredto.setEn(allWords); tripleToCypherExecutor.saveERE(eredto); } } @Override @Transactional(rollbackFor = Exception.class) public void resetGraphData(String documentId) { log.info("resetGraphData:重置知识图谱数据,documentId:{}", documentId); List documentTruncations = documentTruncationService.queryByDocumentId(documentId); if (CollUtil.isEmpty(documentTruncations)){ log.info("没有找到文档切分数据,documentId:{},不用重置数据...", documentId); return; } // 删除切分数据 documentTruncationService.deleteByDocumentId(documentId); for (DocumentTruncation documentTruncation : documentTruncations) { String truncationId = documentTruncation.getId(); // 删除实体数据 truncationEntityExtractionService.deleteByTruncationId(truncationId); // 删除关系数据 relationExtractionService.deleteByTruncationId(truncationId); } log.info("重置知识图谱数据完成,documentId:{}", documentId); } @Override public void submitGenerateTask(String documentId) { // 提交生成图任务 log.info("submitGenerateTask:提交知识图谱生成任务,documentId:{}", documentId); KnowledgeGraphGenerateTreadPool.executorService.execute(() -> { try { pdfInfoService.pdfToGraphStart(documentId); generateGraph(documentId); pdfInfoService.pdfToGraphComplete(documentId); } catch (Exception e) { log.error("生成知识图谱失败,documentId:{}", documentId, e); pdfInfoService.pdfToGraphFail(documentId); } }); } private void saveWordsIfNecessary(String word, List allWords) { boolean exists = chineseEnglishWordsService.wordsExists(word, allWords); if (exists){ return; } String generate = chinesEsToEnglishGenerator.generate(word); if (StrUtil.isEmpty(generate)){ log.warn("生成英文名称失败,entity:{}", word); return; } ChineseEnglishWords words = new ChineseEnglishWords(); words.setChineseWord(word); words.setEnglishWord(generate); chineseEnglishWordsService.saveIfNotExists(words); allWords.add(words);// 更新缓存 } @Override public void queryGraph(String databaseId, String query) { } @Override public void saveERE(EREDTO eredto, String truncationId) { // 保存实体信息 truncationEntityExtractionService.saveERE(eredto.getEntities()); // 保存关系 relationExtractionService.saveERE(eredto.getRelations()); } }