package com.supervision.pdfqaserver.service.impl; import cn.hutool.core.collection.CollUtil; import cn.hutool.core.date.TimeInterval; import cn.hutool.core.lang.Assert; import cn.hutool.core.util.NumberUtil; import cn.hutool.core.util.StrUtil; import cn.hutool.json.JSONUtil; import com.supervision.pdfqaserver.constant.DocumentContentTypeEnum; import com.supervision.pdfqaserver.constant.DomainMetaGenerationEnum; import com.supervision.pdfqaserver.constant.LayoutTypeEnum; import com.supervision.pdfqaserver.domain.*; import com.supervision.pdfqaserver.dto.*; import com.supervision.pdfqaserver.service.*; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.aop.framework.AopContext; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; @Slf4j @Service @RequiredArgsConstructor public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { private final TripleConversionPipeline tripleConversionPipeline; private final TripleToCypherExecutor tripleToCypherExecutor; private final ChineseEnglishWordsService chineseEnglishWordsService; private final DocumentTruncationService documentTruncationService; private final DomainMetadataService domainMetadataService; private final PdfAnalysisOutputService pdfAnalysisOutputService; private final TruncationEntityExtractionService truncationEntityExtractionService; private final TruncationRelationExtractionService truncationRelationExtractionService; private final TruncationErAttributeService truncationErAttributeService; private final TruncationRelationExtractionService relationExtractionService; private final ChinesEsToEnglishGenerator chinesEsToEnglishGenerator; private final PdfInfoService pdfInfoService; private final IntentionService intentionService; private final DomainCategoryService domainCategoryService; @Override public void generateGraph(String pdfId) { ((KnowledgeGraphService)AopContext.currentProxy()).resetGraphData(pdfId); List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(Integer.valueOf(pdfId)); if (CollUtil.isEmpty(pdfAnalysisOutputs)) { log.info("没有找到pdfId为{}的pdf分析结果", pdfId); return; } List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).toList(); // 对文档进行切分 TimeInterval timer = new TimeInterval(); timer.start("sliceDocuments"); log.info("开始切分文档,初始文档个数:{}",documentDTOList.size()); List truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList); log.info("切分文档完成,切分后文档个数:{},耗时:{}秒",truncateDTOS.size(), timer.intervalSecond("sliceDocuments")); // 保存分片信息 documentTruncationService.batchSave(truncateDTOS); // 对切分后的文档进行命名实体识别 timer.start("doEre"); log.info("开始命名实体识别..."); List eredtoList = truncateERE(truncateDTOS); log.info("命名实体识别完成,耗时:{}秒", timer.intervalSecond("doEre")); generateGraph(eredtoList); log.info("生成知识图谱完成,耗时:{}秒", timer.intervalSecond()); } /** * 元数据训练 * @param pdfId pdfId */ @Override public void metaDataTrain(Integer pdfId) { TimeInterval timer = new TimeInterval(); try { metaDataTrainExecutor(pdfId); pdfInfoService.pdfTrainComplete(pdfId); log.info("pdfId:{}元数据训练完成,耗时:{}秒", pdfId, timer.intervalSecond()); }catch (Exception e){ PdfInfo pdfInfo = pdfInfoService.getByPdfId(pdfId); if ( null == pdfInfo.getTrainStatus() || pdfInfo.getTrainStatus() == 0) { log.error("pdfId:{}元数据训练失败...", pdfId, e); pdfInfoService.pdfTrainFail(pdfId); } log.info("pdfId:{}元数据训练失败,耗时:{}秒", pdfId, timer.intervalSecond()); } } @Override public void generateGraphBaseTrain(Integer pdfId) { Assert.notNull(pdfId, "pdfId不能为空"); TimeInterval timer = new TimeInterval(); try { log.info("开始生成知识图谱, pdfId:{}", pdfId); ((KnowledgeGraphService)AopContext.currentProxy()).resetGraphData(pdfId.toString()); pdfInfoService.pdfToGraphStart(pdfId); generateGraphBaseTrainExecutor(pdfId); pdfInfoService.pdfToGraphComplete(pdfId); log.info("pdfId:{}知识图谱生成完成,总耗时:{}秒", pdfId,timer.intervalSecond()); }catch (Exception e){ log.error("pdfId:{}知识图谱生成失败...", pdfId, e); pdfInfoService.pdfToGraphFail(pdfId); log.info("pdfId:{}知识图谱生成失败,总耗时:{}秒", pdfId,timer.intervalSecond()); } } private void metaDataTrainExecutor(Integer pdfId) { Assert.notNull(pdfId, "pdfId不能为空"); PdfInfo pdfInfo = pdfInfoService.getByPdfId(pdfId); Assert.notNull(pdfInfo, "pdfId:{}没有找到对应的pdf信息", pdfId); if (null == pdfInfo.getTrainStatus() || pdfInfo.getTrainStatus() == 2){ log.info("pdfId:{}没有找到对应的pdf训练状态,开始识别文档训练状态...", pdfId); pdfInfoService.pdfTrainStart(pdfId); if (StrUtil.isEmpty(pdfInfo.getContentType())){ log.info("pdfId:{}没有找到对应的pdf内容类型,开始识别文档内容类型...", pdfId); DocumentContentTypeEnum documentContentTypeEnum = tripleConversionPipeline.makeOutPdfContentType(pdfId); log.info("pdfId:{}识别文档内容类型完成,内容类型:{}", pdfId, documentContentTypeEnum.getType()); if (StrUtil.isEmpty(documentContentTypeEnum.getType())){ log.info("pdfId:{}没有找到对应的pdf内容类型,停止后续任务...", pdfId); pdfInfoService.pdfTrainFail(pdfId); return; } pdfInfo.setContentType(documentContentTypeEnum.getType()); pdfInfoService.updateContentType(pdfId, documentContentTypeEnum.getType()); } if (StrUtil.isEmpty(pdfInfo.getDomainCategoryId())){ log.info("pdfId:{}没有找到对应的pdf行业,开始识别文档行业...", pdfId); String industry = tripleConversionPipeline.makeOutPdfIndustry(pdfId); log.info("pdfId:{}识别文档行业完成,行业:{}", pdfId, industry); if (StrUtil.isEmpty(industry)){ log.info("pdfId:{}没有找到对应的pdf行业,停止后续任务...", pdfId); pdfInfoService.pdfTrainFail(pdfId); return; } DomainCategory domainCategory = domainCategoryService.queryByIndustryName(industry); if (null == domainCategory){ log.info("pdfId:{}没有找到:{}对应的行业分类,停止后续任务...", pdfId, industry); pdfInfoService.pdfTrainFail(pdfId); return; } pdfInfo.setDomainCategoryId(domainCategory.getId()); pdfInfoService.updateCategory(pdfId, domainCategory.getId()); } } TripleConversionPipeline tripleConversionPipeline = this.getTripleConversionPipeline(pdfInfo.getContentType(), pdfInfo.getDomainCategoryId()); List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(pdfId); if (CollUtil.isEmpty(pdfAnalysisOutputs)){ log.warn("没有找到pdfId为{}的pdf分析结果,不再进行下一步操作...", pdfId); return; } List documentIds = pdfAnalysisOutputs.stream().map(p->String.valueOf(p.getId())).collect(Collectors.toList()); List documentTruncations = documentTruncationService.queryByDocumentIds(documentIds); if (CollUtil.isNotEmpty(documentTruncations)){ log.info("文档切分数据不为空,pdfId:{},清除切分数据...", pdfId); documentTruncationService.deleteByDocumentIds(documentIds); } log.info("开始切割文档切片,pdfId:{}", pdfId); List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).collect(Collectors.toList()); List truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList); log.info("切割文档切片完成,切片个数:{}", truncateDTOS.size()); // 保存分片信息 documentTruncationService.batchSave(truncateDTOS); // 只识别文本类型数据 truncateDTOS = truncateDTOS.stream() .filter(t->StrUtil.equals(t.getLayoutType(), String.valueOf(LayoutTypeEnum.TEXT.getCode()))).collect(Collectors.toList()); log.info("只识别文本类型数据,个数:{}", truncateDTOS.size()); int truncateSize = truncateDTOS.size(); int index = 1; int intentSize = 0; TimeInterval interval = new TimeInterval(); for (TruncateDTO truncateDTO : truncateDTOS) { try { log.info("正在意图、元数据抽取,切分文档id:{},识别进度:{}", truncateDTO.getId(), NumberUtil.formatPercent((index*1.0)/truncateSize, 2)); log.info("开始意图识别,切分文档id:{}", truncateDTO.getId()); interval.start("makeOutTruncationIntent"); List intents = tripleConversionPipeline.makeOutTruncationIntent(truncateDTO); log.info("意图识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(),interval.intervalMs("makeOutTruncationIntent")); if (CollUtil.isEmpty(intents)){ log.info("切分文档id:{},未正确识别出意图...", truncateDTO.getId()); continue; } log.info("开始意图元数据识别,切分文档id:{}", truncateDTO.getId()); interval.start("makeOutDomainMetadata"); List domainMetadataDTOS = tripleConversionPipeline.makeOutDomainMetadata(truncateDTO, intents); log.info("意图元数据识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(),interval.intervalMs("makeOutDomainMetadata")); // 保存意图数据 intentSize ++; index ++; List intentions = intentionService.batchSaveIfAbsent(intents, pdfInfo.getDomainCategoryId(), pdfId.toString()); for (Intention intention : intentions) { List metadataDTOS = domainMetadataDTOS.stream() .filter(d -> StrUtil.equals(d.getIntentDigest(), intention.getDigest())).toList(); domainMetadataService.batchSaveOrUpdateMetadata(metadataDTOS,intention.getId(), pdfInfo.getDomainCategoryId()); } }catch (Exception e){ index ++; log.error("切分文档id:{},意图识别失败", truncateDTO.getId(), e); } } log.info("意图、元数据抽取完成,耗时:{}秒,一共处理片段数:{}个,抽取出意图数量:{}个", interval.intervalSecond(),truncateSize,intentSize); } private void generateGraphBaseTrainExecutor(Integer pdfId){ Assert.notNull(pdfId, "pdfId不能为空"); PdfInfo pdfInfo = pdfInfoService.getByPdfId(pdfId); Assert.notNull(pdfInfo, "pdfId:{}没有找到对应的pdf信息", pdfId); if (StrUtil.isEmpty(pdfInfo.getContentType())){ log.info("pdfId:{}没有找到对应的pdf内容类型,开始识别文档内容类型...", pdfId); DocumentContentTypeEnum documentContentTypeEnum = tripleConversionPipeline.makeOutPdfContentType(pdfId); if (null == documentContentTypeEnum){ log.info("pdfId:{}没有找到对应的pdf内容类型,停止后续任务...", pdfId); return; } pdfInfo.setContentType(documentContentTypeEnum.getType()); pdfInfoService.updateContentType(pdfId, documentContentTypeEnum.getType()); } if (null == pdfInfo.getDomainCategoryId()){ log.info("pdfId:{}没有找到对应的pdf行业,开始识别文档行业...", pdfId); String industry = tripleConversionPipeline.makeOutPdfIndustry(pdfId); if (StrUtil.isEmpty(industry)){ log.info("pdfId:{}没有找到对应的pdf行业,停止后续任务...", pdfId); return; } DomainCategory domainCategory = domainCategoryService.queryByIndustryName(industry); if (null == domainCategory){ log.info("pdfId:{}没有找到:{}对应的行业分类,停止后续任务...", pdfId, industry); return; } pdfInfo.setDomainCategoryId(domainCategory.getId()); pdfInfoService.updateCategory(pdfId, domainCategory.getId()); } List truncateDTOS = documentTruncationService.listByPdfId(pdfId).stream().map(TruncateDTO::new).collect(Collectors.toList()); TripleConversionPipeline conversionPipeline = this.getTripleConversionPipeline(pdfInfo.getContentType(), pdfInfo.getDomainCategoryId()); if (CollUtil.isEmpty(truncateDTOS)){ log.info("没有找到pdfId为{}的文档切分数据,开始切分数据...", pdfId); List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(pdfId); List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).collect(Collectors.toList()); truncateDTOS = conversionPipeline.sliceDocuments(documentDTOList); documentTruncationService.batchSave(truncateDTOS); log.info("切分数据完成,切分个数:{}", truncateDTOS.size()); } // 查询当前行业分类下的意图 List intentionDTOs = intentionService.queryByDomainCategoryId(pdfInfo.getDomainCategoryId()).stream() .filter(intention -> StrUtil.equals("0",intention.getGenerationType())) // 过滤出手动确认的数据 .map(IntentDTO::new).distinct().toList(); if (CollUtil.isEmpty(intentionDTOs)){ log.info("没有找到行业分类id为{}的意图数据,不再进行下一步操作...", pdfInfo.getDomainCategoryId()); return; } TimeInterval timer = new TimeInterval(); int index = 1; int truncateSize = truncateDTOS.size(); log.info("开始实体关系抽取,耗时:{}秒,一共处理片段数:{}个", timer.intervalSecond(), truncateDTOS.size()); List eredtos = new ArrayList<>(); for (TruncateDTO truncateDTO : truncateDTOS) { index ++; log.info("开始命名实体识别,切分文档id:{},识别进度:{}", truncateDTO.getId(), NumberUtil.formatPercent((index*1.0)/truncateSize, 2)); try { if (StrUtil.equals(truncateDTO.getLayoutType(), String.valueOf(LayoutTypeEnum.TABLE.getCode()))){ log.info("切分文档id:{},表格类型数据,不进行意图识别...", truncateDTO.getId()); EREDTO eredto = conversionPipeline.doEre(truncateDTO, new ArrayList<>()); if (null == eredto){ log.info("切分文档id:{},命名实体识别结果为空...", truncateDTO.getId()); continue; } this.saveERE(eredto, truncateDTO.getId()); eredtos.add(eredto); } timer.start("makeOutTruncationIntent"); log.info("开始意图识别,切分文档id:{}", truncateDTO.getId()); List intents = conversionPipeline.makeOutTruncationIntent(truncateDTO,intentionDTOs); log.info("意图识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(), timer.intervalMs("makeOutTruncationIntent")); if (CollUtil.isEmpty(intents)){ log.info("切分文档id:{},未正确识别出意图...", truncateDTO.getId()); continue; } log.info("开始命名实体识别,切分文档id:{}", truncateDTO.getId()); timer.start("doEre"); EREDTO eredto = conversionPipeline.doEre(truncateDTO, intents); log.info("命名实体识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(), timer.intervalMs("doEre")); if (null == eredto){ log.info("切分文档id:{},命名实体识别结果为空...", truncateDTO.getId()); continue; } // 保存实体关系抽取结果 this.saveERE(eredto, truncateDTO.getId()); eredtos.add(eredto); }catch (Exception e){ log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e); } } log.info("实体关系抽取完成,耗时:{}秒", timer.intervalSecond()); log.info("开始生成知识图谱..."); timer.start("generateGraph"); generateGraphSimple(eredtos); log.info("生成知识图谱完成,耗时:{}秒", timer.intervalSecond("generateGraph")); } @Override public TripleConversionPipeline getTripleConversionPipeline(String contentType, String industry) { // 内容类型决定了文本片段的切分方式,行业类别决定了文本片段的意图 // 内容类型和行业类型确定tripleConversionPipeline的具体实现方式,现在默认是pdf类型 return this.tripleConversionPipeline; } @Override public void generateGraph(List eredtoList) { log.info("开始合并实体关系抽取结果..."); List mergedList = tripleConversionPipeline.mergeEreResults(eredtoList); log.info("合并实体关系抽取结果完成,合并后个数:{}", mergedList.size()); // 保存领域元数据 log.info("开始保存领域元数据..."); for (EREDTO eredto : mergedList) { List relations = eredto.getRelations(); if (CollUtil.isEmpty(relations)){ continue; } for (RelationExtractionDTO relation : relations) { DomainMetadata domainMetadata = relation.toDomainMetadata(); domainMetadata.setGenerationType(DomainMetaGenerationEnum.SYSTEM_AUTO_GENERATION.getCode()); domainMetadataService.saveIfNotExists(domainMetadata); } } log.info("保存领域元数据完成...."); // 保存字典 log.info("开始保存字典..."); List allWords = chineseEnglishWordsService.queryAll(); int wordsSize = allWords.size(); for (EREDTO eredto : mergedList) { List entities = eredto.getEntities(); if (CollUtil.isNotEmpty(entities)){ for (EntityExtractionDTO entityDTO : entities) { saveWordsIfNecessary(entityDTO.getEntity(), allWords); if (CollUtil.isNotEmpty(entityDTO.getAttributes())){ for (TruncationERAttributeDTO attribute : entityDTO.getAttributes()) { saveWordsIfNecessary(attribute.getAttribute(), allWords); } } } } List relations = eredto.getRelations(); if (CollUtil.isNotEmpty(relations)){ for (RelationExtractionDTO relationDTO : relations) { saveWordsIfNecessary(relationDTO.getRelation(), allWords); if (CollUtil.isNotEmpty(relationDTO.getAttributes())){ for (TruncationERAttributeDTO attribute : relationDTO.getAttributes()) { saveWordsIfNecessary(attribute.getAttribute(), allWords); } } } } } log.info("保存字典完成,新增字典个数:{}", allWords.size() - wordsSize); // 生成cypher语句 for (EREDTO eredto : mergedList) { if (CollUtil.isEmpty(eredto.getEntities()) && CollUtil.isEmpty(eredto.getRelations())){ continue; } // 构造一个字典 allWords = getChineseEnglishWords(eredto); eredto.setEn(allWords); try { tripleToCypherExecutor.saveERE(eredto); } catch (Exception e) { log.info("生成cypher语句失败,切分文档id:{}", JSONUtil.toJsonStr(eredto), e); } } } @Override public void generateGraphSimple(List eredtoList) { log.info("开始合并实体关系抽取结果..."); List mergedList = tripleConversionPipeline.mergeEreResults(eredtoList); log.info("合并实体关系抽取结果完成,合并后个数:{}", mergedList.size()); for (EREDTO eredto : mergedList) { if (CollUtil.isEmpty(eredto.getEntities()) && CollUtil.isEmpty(eredto.getRelations())){ continue; } try { tripleToCypherExecutor.saveERE(eredto); } catch (Exception e) { log.info("生成cypher语句失败,切分文档id:{}", JSONUtil.toJsonStr(eredto), e); } } } private static List getChineseEnglishWords(EREDTO eredto) { List allWords; allWords = eredto.getEntities().stream().flatMap(entity -> { List collect = entity.getAttributes().stream().map(e -> { ChineseEnglishWords words = new ChineseEnglishWords(); words.setChineseWord(e.getAttribute()); words.setEnglishWord(e.getAttribute()); return words; }).collect(Collectors.toList()); ChineseEnglishWords words = new ChineseEnglishWords(); words.setChineseWord(entity.getEntity()); words.setEnglishWord(entity.getEntity()); collect.add(words); return collect.stream(); }).collect(Collectors.toList()); eredto.getRelations().stream().flatMap(relation -> { List words = relation.getAttributes().stream().map(e -> { ChineseEnglishWords word = new ChineseEnglishWords(); word.setChineseWord(e.getAttribute()); word.setEnglishWord(e.getAttribute()); return word; }).collect(Collectors.toList()); ChineseEnglishWords words1 = new ChineseEnglishWords(); words1.setChineseWord(relation.getRelation()); words1.setEnglishWord(relation.getRelation()); words.add(words1); ChineseEnglishWords words2 = new ChineseEnglishWords(); words2.setChineseWord(relation.getSourceType()); words2.setEnglishWord(relation.getSourceType()); words.add(words2); ChineseEnglishWords words3 = new ChineseEnglishWords(); words3.setChineseWord(relation.getTargetType()); words3.setEnglishWord(relation.getTargetType()); words.add(words3); return words.stream(); }).forEach(allWords::add); return allWords; } @Override public List truncateERE(List truncateDTOS) { List eredtoList = new ArrayList<>(); int truncateSize = truncateDTOS.size(); int index = 1; for (TruncateDTO truncateDTO : truncateDTOS) { log.info("开始命名实体识别,切分文档id:{},识别进度:{}", truncateDTO.getId(), NumberUtil.formatPercent((index*1.0)/truncateSize, 2)); index++; EREDTO eredto = null; try { eredto = tripleConversionPipeline.doEre(truncateDTO); } catch (Exception e) { log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e); } if (null == eredto){ continue; } // 保存实体关系抽取结果 this.saveERE(eredto, truncateDTO.getId()); eredtoList.add(eredto); } return eredtoList; } @Override @Transactional(rollbackFor = Exception.class) public void resetGraphData(String pdfId) { log.info("resetGraphData:重置知识图谱数据,pdfId:{}", pdfId); List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(Integer.valueOf(pdfId)); if (CollUtil.isEmpty(pdfAnalysisOutputs)){ log.info("没有找到pdfId为{}的pdf分析结果", pdfId); return; } List documentIds = pdfAnalysisOutputs.stream().map(p -> String.valueOf(p.getId())).toList(); List documentTruncations = documentTruncationService.queryByDocumentIds(documentIds); if (CollUtil.isEmpty(documentTruncations)){ log.info("没有找到文档切分数据,pdfId:{},不用重置数据...", pdfId); return; } // 删除切分数据 //documentTruncationService.deleteByDocumentIds(documentIds); for (DocumentTruncation documentTruncation : documentTruncations) { String truncationId = documentTruncation.getId(); // 删除实体数据 truncationEntityExtractionService.deleteByTruncationId(truncationId); // 删除关系数据 relationExtractionService.deleteByTruncationId(truncationId); } log.info("重置知识图谱数据完成,pdfId:{}", pdfId); } private void saveWordsIfNecessary(String word, List allWords) { boolean exists = chineseEnglishWordsService.wordsExists(word, allWords); if (exists){ return; } String generate = chinesEsToEnglishGenerator.generate(word); if (StrUtil.isEmpty(generate)){ log.warn("生成英文名称失败,entity:{}", word); return; } ChineseEnglishWords words = new ChineseEnglishWords(); words.setChineseWord(word); words.setEnglishWord(generate); chineseEnglishWordsService.saveIfNotExists(words); allWords.add(words);// 更新缓存 } @Override public void queryGraph(String databaseId, String query) { } @Override public void saveERE(EREDTO eredto, String truncationId) { // 保存实体信息 truncationEntityExtractionService.saveERE(eredto.getEntities()); // 保存关系 relationExtractionService.saveERE(eredto.getRelations()); } @Override public List listPdfEREDTO(String pdfId) { List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(Integer.valueOf(pdfId)); if (CollUtil.isEmpty(pdfAnalysisOutputs)){ log.info("没有找到pdfId为{}的pdf分析结果", pdfId); return new ArrayList<>(); } List documentIds = pdfAnalysisOutputs.stream().map(p -> p.getId().toString()).toList(); List documentTruncations = documentTruncationService.queryByDocumentIds(documentIds); List truncationIds = documentTruncations.stream().map(DocumentTruncation::getId).toList(); List truncationEntityExtractions = truncationEntityExtractionService.queryByTruncationIds(truncationIds); List truncationRelationExtractions = truncationRelationExtractionService.queryByTruncationIds(truncationIds); List teIds = truncationEntityExtractions.stream().map(TruncationEntityExtraction::getId).toList(); List trIds = truncationRelationExtractions.stream().map(TruncationRelationExtraction::getId).collect(Collectors.toList()); trIds.addAll(teIds); List truncationErAttributes = truncationErAttributeService.queryByTerIds(trIds); List eres = new ArrayList<>(); for (TruncationEntityExtraction entityExtraction : truncationEntityExtractions) { EREDTO eredto = new EREDTO(); EntityExtractionDTO extractionDTO = new EntityExtractionDTO(entityExtraction); List attributes = truncationErAttributes.stream() .filter(t -> StrUtil.equals(entityExtraction.getId(), t.getTerId())).map(TruncationERAttributeDTO::new).collect(Collectors.toList()); extractionDTO.setAttributes(attributes); eredto.getEntities().add(extractionDTO); eres.add(eredto); } for (TruncationRelationExtraction relationExtraction : truncationRelationExtractions) { EREDTO eredto = new EREDTO(); RelationExtractionDTO extractionDTO = new RelationExtractionDTO(relationExtraction); List attributes = truncationErAttributes.stream() .filter(t -> StrUtil.equals(relationExtraction.getId(), t.getTerId())).map(TruncationERAttributeDTO::new).collect(Collectors.toList()); extractionDTO.setAttributes(attributes); eredto.getRelations().add(extractionDTO); eres.add(eredto); } return eres; } }