generateGraph功能优化

master
xueqingkun 1 month ago
parent d0f1147f38
commit 01566bba64

@ -19,6 +19,8 @@ public class PromptCache {
public static final String CLASSIFY_TABLE = "CLASSIFY_TABLE"; public static final String CLASSIFY_TABLE = "CLASSIFY_TABLE";
public static final String EXTRACT_TABLE_TITLE = "EXTRACT_TABLE_TITLE";
public static final Map<String, String> promptMap = new HashMap<>(); public static final Map<String, String> promptMap = new HashMap<>();
static { static {
@ -32,6 +34,7 @@ public class PromptCache {
promptMap.put(TEXT_TO_CYPHER, TEXT_TO_CYPHER_PROMPT); promptMap.put(TEXT_TO_CYPHER, TEXT_TO_CYPHER_PROMPT);
promptMap.put(GENERATE_ANSWER, GENERATE_ANSWER_PROMPT); promptMap.put(GENERATE_ANSWER, GENERATE_ANSWER_PROMPT);
promptMap.put(CLASSIFY_TABLE, CLASSIFY_TABLE_PROMPT); promptMap.put(CLASSIFY_TABLE, CLASSIFY_TABLE_PROMPT);
promptMap.put(EXTRACT_TABLE_TITLE, EXTRACT_TABLE_TITLE_PROMPT);
} }
@ -152,7 +155,8 @@ public class PromptCache {
1. 1.
2. 2.
3. 3.
4. 4.
5.
**** ****
{ {
@ -361,4 +365,18 @@ public class PromptCache {
{} {}
"""; """;
private static final String EXTRACT_TABLE_TITLE_PROMPT = """
****
-
****
-
****
-
****
{}
""";
} }

@ -30,11 +30,11 @@ public class OllamaChatModelAspect {
// 获取原始参数 // 获取原始参数
Object[] args = joinPoint.getArgs(); Object[] args = joinPoint.getArgs();
// 如果是String类型的call方法修改其参数 // 如果是String类型的call方法修改其参数
if (StrUtil.equals(signature, callStringMessage) && args.length > 0 && args[0] instanceof String originalPrompt) { if (StrUtil.equals(signature, callStringMessage) && args.length > 0) {
args[0] = originalPrompt + "/no_think"; args[0] = args[0] + "\n /no_think";
} }
// 执行原方法 // 执行原方法
Object result = joinPoint.proceed(); Object result = joinPoint.proceed(args);
if (StrUtil.equals(model,"qwen3:30b-a3b") ) { if (StrUtil.equals(model,"qwen3:30b-a3b") ) {
if(StrUtil.equals(signature, callStringMessage)){ if(StrUtil.equals(signature, callStringMessage)){
result = ((String) result).replaceAll("(?is)<think\\b[^>]*>(.*?)</think>", "").trim(); result = ((String) result).replaceAll("(?is)<think\\b[^>]*>(.*?)</think>", "").trim();

@ -41,6 +41,15 @@ public class ERAttributeDTO {
public ERAttributeDTO() { public ERAttributeDTO() {
} }
public ERAttributeDTO(TruncationErAttribute truncationErAttribute) {
this.id = truncationErAttribute.getId();
this.terId = truncationErAttribute.getTerId();
this.associationType = truncationErAttribute.getAssociationType();
this.attribute = truncationErAttribute.getAttribute();
this.value = truncationErAttribute.getValue();
this.dataType = truncationErAttribute.getDataType();
}
public ERAttributeDTO(String attribute, String value, String dataType) { public ERAttributeDTO(String attribute, String value, String dataType) {
this.attribute = attribute; this.attribute = attribute;
this.value = value; this.value = value;

@ -2,6 +2,7 @@ package com.supervision.pdfqaserver.dto;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.UUID; import cn.hutool.core.lang.UUID;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
@ -36,6 +37,11 @@ public class EREDTO {
JSONObject nodeJson = (JSONObject) node; JSONObject nodeJson = (JSONObject) node;
String name = nodeJson.getString("name"); String name = nodeJson.getString("name");
String type = nodeJson.getString("type"); String type = nodeJson.getString("type");
if (StrUtil.hasBlank(name,type)){
continue;
}
name = StrUtil.trim(name);
type = StrUtil.trim(type);
JSONObject attributes = nodeJson.getJSONObject("attributes"); JSONObject attributes = nodeJson.getJSONObject("attributes");
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>(); List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
if (CollUtil.isNotEmpty(attributes)){ if (CollUtil.isNotEmpty(attributes)){
@ -60,22 +66,33 @@ public class EREDTO {
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>(); List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
if (CollUtil.isNotEmpty(attributes)){ if (CollUtil.isNotEmpty(attributes)){
for (String key : attributes.keySet()) { for (String key : attributes.keySet()) {
if (StrUtil.isBlank(key)){
continue;
}
Object value = attributes.get(key); Object value = attributes.get(key);
if (value instanceof String){
if (StrUtil.isBlank((String) value)){
continue;
}
value = StrUtil.trim((String) value);
}
String valueString = attributes.getString(key); String valueString = attributes.getString(key);
ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0"); ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0");
erAttributeDTOS.add(erAttributeDTO); erAttributeDTOS.add(erAttributeDTO);
} }
} }
if (StrUtil.isEmpty(source) || StrUtil.isEmpty(target)){ if (StrUtil.isBlank(source) || StrUtil.isBlank(target)){
log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson); log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson);
continue; continue;
} }
Optional<EntityExtractionDTO> sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), source)).findFirst(); final String sourceTrim = StrUtil.trim(source);
Optional<EntityExtractionDTO> sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), sourceTrim)).findFirst();
if (sourceTypeOpt.isEmpty()){ if (sourceTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson); log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson);
continue; continue;
} }
Optional<EntityExtractionDTO> targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), target)).findFirst(); final String targetTrim = StrUtil.trim(target);
Optional<EntityExtractionDTO> targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), targetTrim)).findFirst();
if (targetTypeOpt.isEmpty()){ if (targetTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson); log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson);
continue; continue;
@ -108,12 +125,22 @@ public class EREDTO {
EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO(); EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO();
entityExtractionDTO.setEntity("行"); entityExtractionDTO.setEntity("行");
// 避免表格行名重复 // 避免表格行名重复
entityExtractionDTO.setName("行-" + UUID.randomUUID()); entityExtractionDTO.setName("行-" + RandomUtil.randomString(UUID.randomUUID().toString(), 10));
entityExtractionDTO.setTruncationId(truncationId); entityExtractionDTO.setTruncationId(truncationId);
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>(); List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
for (Map.Entry<String, Object> tableEntry : tableJson.entrySet()) { for (Map.Entry<String, Object> tableEntry : tableJson.entrySet()) {
String key = tableEntry.getKey(); String key = tableEntry.getKey();
if (StrUtil.isBlank(key)){
continue;
}
key = StrUtil.trim(key);
Object value = tableEntry.getValue(); Object value = tableEntry.getValue();
if (value instanceof String){
if (StrUtil.isBlank(value.toString())){
continue;
}
value = StrUtil.trim((String) value);
}
ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, value.toString(), value instanceof Number ? "1" : "0"); ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, value.toString(), value instanceof Number ? "1" : "0");
erAttributeDTOS.add(erAttributeDTO); erAttributeDTOS.add(erAttributeDTO);
} }

@ -1,5 +1,6 @@
package com.supervision.pdfqaserver.dto; package com.supervision.pdfqaserver.dto;
import com.supervision.pdfqaserver.domain.TruncationEntityExtraction;
import lombok.Data; import lombok.Data;
import java.util.ArrayList; import java.util.ArrayList;
@ -35,6 +36,13 @@ public class EntityExtractionDTO {
public EntityExtractionDTO() { public EntityExtractionDTO() {
} }
public EntityExtractionDTO(TruncationEntityExtraction entityExtraction) {
this.id = entityExtraction.getId();
this.truncationId = entityExtraction.getTruncationId();
this.entity = entityExtraction.getEntity();
this.name = entityExtraction.getName();
}
public EntityExtractionDTO(String truncationId, String entity, String name, List<ERAttributeDTO> attributes) { public EntityExtractionDTO(String truncationId, String entity, String name, List<ERAttributeDTO> attributes) {
this.truncationId = truncationId; this.truncationId = truncationId;
this.entity = entity; this.entity = entity;

@ -51,7 +51,17 @@ public class RelationExtractionDTO {
public RelationExtractionDTO() { public RelationExtractionDTO() {
} }
public RelationExtractionDTO(String truncationId,String source, String sourceType,String relation, String target,String targetType, List<ERAttributeDTO> attributes) { public RelationExtractionDTO(TruncationRelationExtraction relationExtraction) {
this.id = relationExtraction.getId();
this.truncationId = relationExtraction.getTruncationId();
this.source = relationExtraction.getSource();
this.sourceType = relationExtraction.getSourceType();
this.relation = relationExtraction.getRelation();
this.target = relationExtraction.getTarget();
this.targetType = relationExtraction.getTargetType();
}
public RelationExtractionDTO(String truncationId, String source, String sourceType, String relation, String target, String targetType, List<ERAttributeDTO> attributes) {
this.truncationId = truncationId; this.truncationId = truncationId;
this.source = source; this.source = source;
this.relation = relation; this.relation = relation;

@ -18,5 +18,11 @@ public interface DocumentTruncationService extends IService<DocumentTruncation>
void deleteByDocumentId(String documentId); void deleteByDocumentId(String documentId);
void deleteByDocumentIds(List<String> documentIds);
List<DocumentTruncation> queryByDocumentId(String documentId); List<DocumentTruncation> queryByDocumentId(String documentId);
List<DocumentTruncation> queryByDocumentIds(List<String> documentIds);
List<DocumentTruncation> queryNotERETruncate(List<String> documentIds);
} }

@ -1,6 +1,8 @@
package com.supervision.pdfqaserver.service; package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.dto.EREDTO; import com.supervision.pdfqaserver.dto.EREDTO;
import com.supervision.pdfqaserver.dto.TruncateDTO;
import java.util.List;
/** /**
* *
@ -14,12 +16,16 @@ public interface KnowledgeGraphService {
*/ */
void generateGraph(String documentId); void generateGraph(String documentId);
void generateGraph(List<EREDTO> eredtoList);
List<EREDTO> truncateERE(List<TruncateDTO> truncateDTOS);
/** /**
* *
* @param documentId ID * @param pdfId pdfId
*/ */
void resetGraphData(String documentId); void resetGraphData(String pdfId);
/** /**
* *
@ -31,4 +37,6 @@ public interface KnowledgeGraphService {
void saveERE(EREDTO eredto, String truncationId); void saveERE(EREDTO eredto, String truncationId);
List<EREDTO> listPdfEREDTO(String pdfId);
} }

@ -15,4 +15,6 @@ public interface TruncationEntityExtractionService extends IService<TruncationEn
void saveERE(List<EntityExtractionDTO> entities); void saveERE(List<EntityExtractionDTO> entities);
void deleteByTruncationId(String truncationId); void deleteByTruncationId(String truncationId);
List<TruncationEntityExtraction> queryByTruncationIds(List<String> truncationIds);
} }

@ -14,4 +14,6 @@ public interface TruncationErAttributeService extends IService<TruncationErAttri
void deleteByTerId(String terId); void deleteByTerId(String terId);
void deleteByTerIds(List<String> terIds); void deleteByTerIds(List<String> terIds);
List<TruncationErAttribute> queryByTerIds(List<String> terIds);
} }

@ -16,4 +16,5 @@ public interface TruncationRelationExtractionService extends IService<Truncation
void deleteByTruncationId(String truncationId); void deleteByTruncationId(String truncationId);
List<TruncationRelationExtraction> queryByTruncationIds(List<String> documentIds);
} }

@ -19,7 +19,8 @@ public class ChinesEsToEnglishGeneratorImpl implements ChinesEsToEnglishGenerato
public String generate(String chinese) { public String generate(String chinese) {
log.info("generate:开始翻译: {}",chinese); log.info("generate:开始翻译: {}",chinese);
String prompt = PromptCache.promptMap.get(CHINESE_TO_ENGLISH); String prompt = PromptCache.promptMap.get(CHINESE_TO_ENGLISH);
String response = ollamaChatModel.call(StrUtil.format(prompt, chinese)); String format = StrUtil.format(prompt, chinese);
String response = ollamaChatModel.call(format);
log.info("generate:chinese:{}翻译结果: {}",chinese,response); log.info("generate:chinese:{}翻译结果: {}",chinese,response);
return response; return response;
} }

@ -10,6 +10,7 @@ import com.supervision.pdfqaserver.mapper.DocumentTruncationMapper;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import java.util.ArrayList;
import java.util.List; import java.util.List;
/** /**
@ -42,10 +43,31 @@ public class DocumentTruncationServiceImpl extends ServiceImpl<DocumentTruncatio
this.lambdaUpdate().eq(DocumentTruncation::getDocumentId, documentId).remove(); this.lambdaUpdate().eq(DocumentTruncation::getDocumentId, documentId).remove();
} }
@Override
public void deleteByDocumentIds(List<String> documentIds) {
if (CollUtil.isEmpty(documentIds)){
return;
}
this.lambdaUpdate().in(DocumentTruncation::getDocumentId, documentIds).remove();
}
@Override @Override
public List<DocumentTruncation> queryByDocumentId(String documentId) { public List<DocumentTruncation> queryByDocumentId(String documentId) {
return this.lambdaQuery().eq(DocumentTruncation::getDocumentId, documentId).list(); return this.lambdaQuery().eq(DocumentTruncation::getDocumentId, documentId).list();
} }
@Override
public List<DocumentTruncation> queryByDocumentIds(List<String> documentIds) {
if (CollUtil.isEmpty(documentIds)){
return new ArrayList<>();
}
return this.lambdaQuery().in(DocumentTruncation::getDocumentId, documentIds).list();
}
@Override
public List<DocumentTruncation> queryNotERETruncate(List<String> documentIds) {
return null;
}
} }

@ -2,13 +2,12 @@ package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.date.TimeInterval; import cn.hutool.core.date.TimeInterval;
import cn.hutool.core.util.NumberUtil;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil;
import com.supervision.pdfqaserver.constant.DomainMetaGenerationEnum; import com.supervision.pdfqaserver.constant.DomainMetaGenerationEnum;
import com.supervision.pdfqaserver.domain.ChineseEnglishWords; import com.supervision.pdfqaserver.domain.*;
import com.supervision.pdfqaserver.domain.DocumentTruncation;
import com.supervision.pdfqaserver.domain.DomainMetadata;
import com.supervision.pdfqaserver.dto.*; import com.supervision.pdfqaserver.dto.*;
import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
import com.supervision.pdfqaserver.service.*; import com.supervision.pdfqaserver.service.*;
import com.supervision.pdfqaserver.thread.KnowledgeGraphGenerateTreadPool; import com.supervision.pdfqaserver.thread.KnowledgeGraphGenerateTreadPool;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
@ -18,6 +17,7 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.Collectors;
@Slf4j @Slf4j
@Service @Service
@ -38,6 +38,10 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
private final TruncationEntityExtractionService truncationEntityExtractionService; private final TruncationEntityExtractionService truncationEntityExtractionService;
private final TruncationRelationExtractionService truncationRelationExtractionService;
private final TruncationErAttributeService truncationErAttributeService;
private final TruncationRelationExtractionService relationExtractionService; private final TruncationRelationExtractionService relationExtractionService;
private final ChinesEsToEnglishGenerator chinesEsToEnglishGenerator; private final ChinesEsToEnglishGenerator chinesEsToEnglishGenerator;
@ -65,24 +69,15 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
// 对切分后的文档进行命名实体识别 // 对切分后的文档进行命名实体识别
timer.start("doEre"); timer.start("doEre");
log.info("开始命名实体识别..."); log.info("开始命名实体识别...");
List<EREDTO> eredtoList = new ArrayList<>(); List<EREDTO> eredtoList = truncateERE(truncateDTOS);
for (TruncateDTO truncateDTO : truncateDTOS) {
EREDTO eredto = null;
try {
eredto = tripleConversionPipeline.doEre(truncateDTO);
} catch (Exception e) {
log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e);
}
if (null == eredto){
continue;
}
// 保存实体关系抽取结果
this.saveERE(eredto, truncateDTO.getId());
eredtoList.add(eredto);
}
log.info("命名实体识别完成,耗时:{}秒", timer.intervalSecond("doEre")); log.info("命名实体识别完成,耗时:{}秒", timer.intervalSecond("doEre"));
// 合并实体关系抽取结果 generateGraph(eredtoList);
}
@Override
public void generateGraph(List<EREDTO> eredtoList) {
log.info("开始合并实体关系抽取结果..."); log.info("开始合并实体关系抽取结果...");
List<EREDTO> mergedList = tripleConversionPipeline.mergeEreResults(eredtoList); List<EREDTO> mergedList = tripleConversionPipeline.mergeEreResults(eredtoList);
log.info("合并实体关系抽取结果完成,合并后个数:{}", mergedList.size()); log.info("合并实体关系抽取结果完成,合并后个数:{}", mergedList.size());
@ -137,23 +132,100 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
if (CollUtil.isEmpty(eredto.getEntities()) && CollUtil.isEmpty(eredto.getRelations())){ if (CollUtil.isEmpty(eredto.getEntities()) && CollUtil.isEmpty(eredto.getRelations())){
continue; continue;
} }
// 构造一个字典
allWords = getChineseEnglishWords(eredto);
eredto.setEn(allWords); eredto.setEn(allWords);
tripleToCypherExecutor.saveERE(eredto); try {
tripleToCypherExecutor.saveERE(eredto);
} catch (Exception e) {
log.info("生成cypher语句失败,切分文档id:{}", JSONUtil.toJsonStr(eredto), e);
}
} }
}
private static List<ChineseEnglishWords> getChineseEnglishWords(EREDTO eredto) {
List<ChineseEnglishWords> allWords;
allWords = eredto.getEntities().stream().flatMap(entity -> {
List<ChineseEnglishWords> collect = entity.getAttributes().stream().map(e -> {
ChineseEnglishWords words = new ChineseEnglishWords();
words.setChineseWord(e.getAttribute());
words.setEnglishWord(e.getAttribute());
return words;
}).collect(Collectors.toList());
ChineseEnglishWords words = new ChineseEnglishWords();
words.setChineseWord(entity.getEntity());
words.setEnglishWord(entity.getEntity());
collect.add(words);
return collect.stream();
}).collect(Collectors.toList());
eredto.getRelations().stream().flatMap(relation -> {
List<ChineseEnglishWords> words = relation.getAttributes().stream().map(e -> {
ChineseEnglishWords word = new ChineseEnglishWords();
word.setChineseWord(e.getAttribute());
word.setEnglishWord(e.getAttribute());
return word;
}).collect(Collectors.toList());
ChineseEnglishWords words1 = new ChineseEnglishWords();
words1.setChineseWord(relation.getRelation());
words1.setEnglishWord(relation.getRelation());
words.add(words1);
ChineseEnglishWords words2 = new ChineseEnglishWords();
words2.setChineseWord(relation.getSourceType());
words2.setEnglishWord(relation.getSourceType());
words.add(words2);
ChineseEnglishWords words3 = new ChineseEnglishWords();
words3.setChineseWord(relation.getTargetType());
words3.setEnglishWord(relation.getTargetType());
words.add(words3);
return words.stream();
}).forEach(allWords::add);
return allWords;
}
@Override
public List<EREDTO> truncateERE(List<TruncateDTO> truncateDTOS) {
List<EREDTO> eredtoList = new ArrayList<>();
int truncateSize = truncateDTOS.size();
int index = 1;
for (TruncateDTO truncateDTO : truncateDTOS) {
log.info("开始命名实体识别,切分文档id:{},识别进度:{}", truncateDTO.getId(), NumberUtil.formatPercent((index*1.0)/truncateSize, 2));
index++;
EREDTO eredto = null;
try {
eredto = tripleConversionPipeline.doEre(truncateDTO);
} catch (Exception e) {
log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e);
}
if (null == eredto){
continue;
}
// 保存实体关系抽取结果
this.saveERE(eredto, truncateDTO.getId());
eredtoList.add(eredto);
}
return eredtoList;
} }
@Override @Override
@Transactional(rollbackFor = Exception.class) @Transactional(rollbackFor = Exception.class)
public void resetGraphData(String documentId) { public void resetGraphData(String pdfId) {
log.info("resetGraphData:重置知识图谱数据,documentId:{}", documentId); log.info("resetGraphData:重置知识图谱数据,pdfId:{}", pdfId);
List<DocumentTruncation> documentTruncations = documentTruncationService.queryByDocumentId(documentId); List<PdfAnalysisOutput> pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(Integer.valueOf(pdfId));
if (CollUtil.isEmpty(pdfAnalysisOutputs)){
log.info("没有找到pdfId为{}的pdf分析结果", pdfId);
return;
}
List<String> documentIds = pdfAnalysisOutputs.stream().map(p -> String.valueOf(p.getId())).toList();
List<DocumentTruncation> documentTruncations = documentTruncationService.queryByDocumentIds(documentIds);
if (CollUtil.isEmpty(documentTruncations)){ if (CollUtil.isEmpty(documentTruncations)){
log.info("没有找到文档切分数据,documentId:{},不用重置数据...", documentId); log.info("没有找到文档切分数据,pdfId:{},不用重置数据...", pdfId);
return; return;
} }
// 删除切分数据 // 删除切分数据
documentTruncationService.deleteByDocumentId(documentId); documentTruncationService.deleteByDocumentIds(documentIds);
for (DocumentTruncation documentTruncation : documentTruncations) { for (DocumentTruncation documentTruncation : documentTruncations) {
String truncationId = documentTruncation.getId(); String truncationId = documentTruncation.getId();
// 删除实体数据 // 删除实体数据
@ -161,7 +233,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
// 删除关系数据 // 删除关系数据
relationExtractionService.deleteByTruncationId(truncationId); relationExtractionService.deleteByTruncationId(truncationId);
} }
log.info("重置知识图谱数据完成,documentId:{}", documentId); log.info("重置知识图谱数据完成,pdfId:{}", pdfId);
} }
@ -214,4 +286,46 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
relationExtractionService.saveERE(eredto.getRelations()); relationExtractionService.saveERE(eredto.getRelations());
} }
@Override
public List<EREDTO> listPdfEREDTO(String pdfId) {
List<PdfAnalysisOutput> pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(Integer.valueOf(pdfId));
if (CollUtil.isEmpty(pdfAnalysisOutputs)){
log.info("没有找到pdfId为{}的pdf分析结果", pdfId);
return new ArrayList<>();
}
List<String> documentIds = pdfAnalysisOutputs.stream().map(p -> p.getId().toString()).toList();
List<DocumentTruncation> documentTruncations = documentTruncationService.queryByDocumentIds(documentIds);
List<String> truncationIds = documentTruncations.stream().map(DocumentTruncation::getId).toList();
List<TruncationEntityExtraction> truncationEntityExtractions = truncationEntityExtractionService.queryByTruncationIds(truncationIds);
List<TruncationRelationExtraction> truncationRelationExtractions = truncationRelationExtractionService.queryByTruncationIds(truncationIds);
List<String> teIds = truncationEntityExtractions.stream().map(TruncationEntityExtraction::getId).toList();
List<String> trIds = truncationRelationExtractions.stream().map(TruncationRelationExtraction::getId).collect(Collectors.toList());
trIds.addAll(teIds);
List<TruncationErAttribute> truncationErAttributes = truncationErAttributeService.queryByTerIds(trIds);
List<EREDTO> eres = new ArrayList<>();
for (TruncationEntityExtraction entityExtraction : truncationEntityExtractions) {
EREDTO eredto = new EREDTO();
EntityExtractionDTO extractionDTO = new EntityExtractionDTO(entityExtraction);
List<ERAttributeDTO> attributes = truncationErAttributes.stream()
.filter(t -> StrUtil.equals(entityExtraction.getId(), t.getTerId())).map(ERAttributeDTO::new).collect(Collectors.toList());
extractionDTO.setAttributes(attributes);
eredto.getEntities().add(extractionDTO);
eres.add(eredto);
}
for (TruncationRelationExtraction relationExtraction : truncationRelationExtractions) {
EREDTO eredto = new EREDTO();
RelationExtractionDTO extractionDTO = new RelationExtractionDTO(relationExtraction);
List<ERAttributeDTO> attributes = truncationErAttributes.stream()
.filter(t -> StrUtil.equals(relationExtraction.getId(), t.getTerId())).map(ERAttributeDTO::new).collect(Collectors.toList());
extractionDTO.setAttributes(attributes);
eredto.getRelations().add(extractionDTO);
eres.add(eredto);
}
return eres;
}
} }

@ -3,6 +3,7 @@ package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Assert; import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.BooleanUtil; import cn.hutool.core.util.BooleanUtil;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import com.supervision.pdfqaserver.cache.PromptCache; import com.supervision.pdfqaserver.cache.PromptCache;
import com.supervision.pdfqaserver.constant.LayoutTypeEnum; import com.supervision.pdfqaserver.constant.LayoutTypeEnum;
@ -62,9 +63,40 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
truncateDTOS.add(truncateDTO); truncateDTOS.add(truncateDTO);
} }
} else if (LayoutTypeEnum.TABLE.getCode() == layoutType) { } else if (LayoutTypeEnum.TABLE.getCode() == layoutType) {
// 如果是表格类型的布局,直接添加到列表中 // 如果是表格类型的布局,进行切分
TruncateDTO truncateDTO = new TruncateDTO(documentDTO);
truncateDTOS.add(truncateDTO); // 提前抽取表名
TableTitleDTO tableTitleDTO = this.extractTableTitle(documentDTO.getTitle());
if (null != tableTitleDTO && StrUtil.isNotEmpty(tableTitleDTO.getTitle())){
documentDTO.setTitle(tableTitleDTO.getTitle());
}else {
// 生成一个默认的表
documentDTO.setTitle("tableName-"+ RandomUtil.randomString(10));
}
List<String> tableRows = StrUtil.split(documentDTO.getContent(), "\n").stream().filter(StrUtil::isNotEmpty).collect(Collectors.toList());
if (tableRows.size()<5){
TruncateDTO truncateDTO = new TruncateDTO(documentDTO);
truncateDTOS.add(truncateDTO);
continue;
}
String tableTitle = tableRows.get(0);
// 标题分割符
String tableTitleSplit = tableRows.get(1);
List<String> noTitleRows = tableRows.subList(2,tableRows.size()-1);
List<List<String>> rows = CollUtil.split(noTitleRows, 4);
for (List<String> row : rows) {
StringBuilder sb = new StringBuilder();
sb.append(tableTitle).append("\n");
sb.append(tableTitleSplit).append("\n");
for (String s : row) {
sb.append(s).append("\n");
}
TruncateDTO truncateDTO = new TruncateDTO(documentDTO);
truncateDTO.setContent(sb.toString());
truncateDTOS.add(truncateDTO);
}
} else { } else {
log.info("sliceDocuments:错误的布局类型: {}", layoutType); log.info("sliceDocuments:错误的布局类型: {}", layoutType);
} }
@ -89,9 +121,10 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
if (classify){ if (classify){
return doTextEre(truncateDTO); return doTextEre(truncateDTO);
} }
return doTableEre(truncateDTO); return doTableEre(truncateDTO);
} }
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType()); log.warn("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
return null; return null;
} }
@ -118,7 +151,14 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
@Override @Override
public TableTitleDTO extractTableTitle(String content) { public TableTitleDTO extractTableTitle(String content) {
return null; TableTitleDTO tableTitleDTO = new TableTitleDTO();
if (StrUtil.isEmpty(content)){
log.warn("extractTableTitle:内容为空");
return tableTitleDTO;
}
String table = PromptCache.promptMap.get(PromptCache.EXTRACT_TABLE_TITLE);
tableTitleDTO.setTitle(table);
return tableTitleDTO;
} }
private EREDTO doTextEre(TruncateDTO truncateDTO) { private EREDTO doTextEre(TruncateDTO truncateDTO) {
@ -140,6 +180,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
// 手动设置表格标题 // 手动设置表格标题
EntityExtractionDTO titleEntity = new EntityExtractionDTO(); EntityExtractionDTO titleEntity = new EntityExtractionDTO();
titleEntity.setEntity("表"); titleEntity.setEntity("表");
titleEntity.setTruncationId(truncateDTO.getId());
titleEntity.setName(truncateDTO.getTitle()); titleEntity.setName(truncateDTO.getTitle());
// 添加关系 // 添加关系
List<RelationExtractionDTO> relations = new ArrayList<>(); List<RelationExtractionDTO> relations = new ArrayList<>();

@ -62,6 +62,7 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
Map<String, Object> attributes = entity.getAttributes().stream().collect(Collectors.toMap( Map<String, Object> attributes = entity.getAttributes().stream().collect(Collectors.toMap(
ERAttributeDTO::getAttributeEn, ERAttributeDTO::getValue ERAttributeDTO::getAttributeEn, ERAttributeDTO::getValue
)); ));
attributes.put("truncationId", entity.getTruncationId());
attributes.put("name", entity.getName()); attributes.put("name", entity.getName());
log.info("保存节点{},属性:{}", entity.getEntityEn(),JSONUtil.toJsonStr(entity.getAttributes())); log.info("保存节点{},属性:{}", entity.getEntityEn(),JSONUtil.toJsonStr(entity.getAttributes()));
List<Long> nodeIds = neo4jRepository.saveOrUpdateEntityNode(entity.getEntityEn(), "name", attributes); List<Long> nodeIds = neo4jRepository.saveOrUpdateEntityNode(entity.getEntityEn(), "name", attributes);
@ -86,6 +87,8 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
Map<String, Object> attributes = relation.getAttributes().stream().collect(Collectors.toMap( Map<String, Object> attributes = relation.getAttributes().stream().collect(Collectors.toMap(
ERAttributeDTO::getAttributeEn, ERAttributeDTO::getValue ERAttributeDTO::getAttributeEn, ERAttributeDTO::getValue
)); ));
attributes.put("sourceType", relation.getSourceType());
attributes.put("truncationId", relation.getTruncationId());
for (Long sourceNodeId : sourceNodeIds) { for (Long sourceNodeId : sourceNodeIds) {
for (Long targetNodeId : targetNodeIds) { for (Long targetNodeId : targetNodeIds) {
if (sourceNodeId.equals(targetNodeId)) { if (sourceNodeId.equals(targetNodeId)) {

@ -13,7 +13,7 @@ import com.supervision.pdfqaserver.service.TruncationErAttributeService;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List; import java.util.List;
/** /**
@ -65,6 +65,14 @@ public class TruncationEntityExtractionServiceImpl extends ServiceImpl<Truncatio
this.lambdaUpdate().eq(TruncationEntityExtraction::getTruncationId, truncationId).remove(); this.lambdaUpdate().eq(TruncationEntityExtraction::getTruncationId, truncationId).remove();
} }
} }
@Override
public List<TruncationEntityExtraction> queryByTruncationIds(List<String> truncationIds) {
if (CollUtil.isEmpty(truncationIds)){
return new ArrayList<>();
}
return this.lambdaQuery().in(TruncationEntityExtraction::getTruncationId, truncationIds).list();
}
} }

@ -7,6 +7,7 @@ import com.supervision.pdfqaserver.domain.TruncationErAttribute;
import com.supervision.pdfqaserver.service.TruncationErAttributeService; import com.supervision.pdfqaserver.service.TruncationErAttributeService;
import com.supervision.pdfqaserver.mapper.TruncationErAttributeMapper; import com.supervision.pdfqaserver.mapper.TruncationErAttributeMapper;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List; import java.util.List;
/** /**
@ -34,6 +35,14 @@ public class TruncationErAttributeServiceImpl extends ServiceImpl<TruncationErAt
} }
this.lambdaUpdate().in(TruncationErAttribute::getTerId, terIds).remove(); this.lambdaUpdate().in(TruncationErAttribute::getTerId, terIds).remove();
} }
@Override
public List<TruncationErAttribute> queryByTerIds(List<String> terIds) {
if (CollUtil.isEmpty(terIds)){
return new ArrayList<>();
}
return this.lambdaQuery().in(TruncationErAttribute::getTerId, terIds).list();
}
} }

@ -13,6 +13,7 @@ import com.supervision.pdfqaserver.mapper.TruncationRelationExtractionMapper;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List; import java.util.List;
/** /**
@ -60,6 +61,14 @@ public class TruncationRelationExtractionServiceImpl extends ServiceImpl<Truncat
this.lambdaUpdate().eq(TruncationRelationExtraction::getTruncationId, truncationId).remove(); this.lambdaUpdate().eq(TruncationRelationExtraction::getTruncationId, truncationId).remove();
} }
} }
@Override
public List<TruncationRelationExtraction> queryByTruncationIds(List<String> documentIds) {
if (CollUtil.isEmpty(documentIds)){
return new ArrayList<>();
}
return this.lambdaQuery().in(TruncationRelationExtraction::getTruncationId, documentIds).list();
}
} }

@ -44,7 +44,7 @@
<maxFileSize>100MB</maxFileSize> <maxFileSize>100MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy> </timeBasedFileNamingAndTriggeringPolicy>
<!--日志文档保留天数--> <!--日志文档保留天数-->
<maxHistory>2</maxHistory> <maxHistory>30</maxHistory>
<totalSizeCap>500MB</totalSizeCap> <totalSizeCap>500MB</totalSizeCap>
</rollingPolicy> </rollingPolicy>
<!-- 此日志文档只记录debug级别的 --> <!-- 此日志文档只记录debug级别的 -->
@ -72,7 +72,7 @@
<maxFileSize>100MB</maxFileSize> <maxFileSize>100MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy> </timeBasedFileNamingAndTriggeringPolicy>
<!--日志文档保留天数--> <!--日志文档保留天数-->
<maxHistory>2</maxHistory> <maxHistory>30</maxHistory>
<totalSizeCap>1GB</totalSizeCap> <totalSizeCap>1GB</totalSizeCap>
</rollingPolicy> </rollingPolicy>
<!-- 此日志文档只记录info级别的 --> <!-- 此日志文档只记录info级别的 -->
@ -99,7 +99,7 @@
<maxFileSize>100MB</maxFileSize> <maxFileSize>100MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy> </timeBasedFileNamingAndTriggeringPolicy>
<!--日志文档保留天数--> <!--日志文档保留天数-->
<maxHistory>2</maxHistory> <maxHistory>30</maxHistory>
<totalSizeCap>500MB</totalSizeCap> <totalSizeCap>500MB</totalSizeCap>
</rollingPolicy> </rollingPolicy>
<!-- 此日志文档只记录warn级别的 --> <!-- 此日志文档只记录warn级别的 -->
@ -126,7 +126,7 @@
<maxFileSize>100MB</maxFileSize> <maxFileSize>100MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy> </timeBasedFileNamingAndTriggeringPolicy>
<!--日志文档保留天数--> <!--日志文档保留天数-->
<maxHistory>2</maxHistory> <maxHistory>30</maxHistory>
<totalSizeCap>500MB</totalSizeCap> <totalSizeCap>500MB</totalSizeCap>
</rollingPolicy> </rollingPolicy>
<!-- 此日志文档只记录ERROR级别的 --> <!-- 此日志文档只记录ERROR级别的 -->

@ -1,5 +1,6 @@
package com.supervision.pdfqaserver; package com.supervision.pdfqaserver;
import com.supervision.pdfqaserver.dto.EREDTO;
import com.supervision.pdfqaserver.service.ChinesEsToEnglishGenerator; import com.supervision.pdfqaserver.service.ChinesEsToEnglishGenerator;
import com.supervision.pdfqaserver.service.KnowledgeGraphService; import com.supervision.pdfqaserver.service.KnowledgeGraphService;
import com.supervision.pdfqaserver.service.TripleConversionPipeline; import com.supervision.pdfqaserver.service.TripleConversionPipeline;
@ -22,7 +23,15 @@ class PdfQaServerApplicationTests {
private KnowledgeGraphService knowledgeGraphService; private KnowledgeGraphService knowledgeGraphService;
@Test @Test
void generateGraphTest() { void generateGraphTest() {
knowledgeGraphService.generateGraph("1"); knowledgeGraphService.generateGraph("40");
log.info("finish...");
}
@Test
void testGenerateGraph2() {
List<EREDTO> eredtos = knowledgeGraphService.listPdfEREDTO("17");
knowledgeGraphService.generateGraph(eredtos);
log.info("finish..."); log.info("finish...");
} }

Loading…
Cancel
Save