generateGraph功能优化

master
xueqingkun 1 month ago
parent d0f1147f38
commit 01566bba64

@ -19,6 +19,8 @@ public class PromptCache {
public static final String CLASSIFY_TABLE = "CLASSIFY_TABLE";
public static final String EXTRACT_TABLE_TITLE = "EXTRACT_TABLE_TITLE";
public static final Map<String, String> promptMap = new HashMap<>();
static {
@ -32,6 +34,7 @@ public class PromptCache {
promptMap.put(TEXT_TO_CYPHER, TEXT_TO_CYPHER_PROMPT);
promptMap.put(GENERATE_ANSWER, GENERATE_ANSWER_PROMPT);
promptMap.put(CLASSIFY_TABLE, CLASSIFY_TABLE_PROMPT);
promptMap.put(EXTRACT_TABLE_TITLE, EXTRACT_TABLE_TITLE_PROMPT);
}
@ -152,7 +155,8 @@ public class PromptCache {
1.
2.
3.
4.
4.
5.
****
{
@ -361,4 +365,18 @@ public class PromptCache {
{}
""";
private static final String EXTRACT_TABLE_TITLE_PROMPT = """
****
-
****
-
****
-
****
{}
""";
}

@ -30,11 +30,11 @@ public class OllamaChatModelAspect {
// 获取原始参数
Object[] args = joinPoint.getArgs();
// 如果是String类型的call方法修改其参数
if (StrUtil.equals(signature, callStringMessage) && args.length > 0 && args[0] instanceof String originalPrompt) {
args[0] = originalPrompt + "/no_think";
if (StrUtil.equals(signature, callStringMessage) && args.length > 0) {
args[0] = args[0] + "\n /no_think";
}
// 执行原方法
Object result = joinPoint.proceed();
Object result = joinPoint.proceed(args);
if (StrUtil.equals(model,"qwen3:30b-a3b") ) {
if(StrUtil.equals(signature, callStringMessage)){
result = ((String) result).replaceAll("(?is)<think\\b[^>]*>(.*?)</think>", "").trim();

@ -41,6 +41,15 @@ public class ERAttributeDTO {
public ERAttributeDTO() {
}
public ERAttributeDTO(TruncationErAttribute truncationErAttribute) {
this.id = truncationErAttribute.getId();
this.terId = truncationErAttribute.getTerId();
this.associationType = truncationErAttribute.getAssociationType();
this.attribute = truncationErAttribute.getAttribute();
this.value = truncationErAttribute.getValue();
this.dataType = truncationErAttribute.getDataType();
}
public ERAttributeDTO(String attribute, String value, String dataType) {
this.attribute = attribute;
this.value = value;

@ -2,6 +2,7 @@ package com.supervision.pdfqaserver.dto;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.UUID;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
@ -36,6 +37,11 @@ public class EREDTO {
JSONObject nodeJson = (JSONObject) node;
String name = nodeJson.getString("name");
String type = nodeJson.getString("type");
if (StrUtil.hasBlank(name,type)){
continue;
}
name = StrUtil.trim(name);
type = StrUtil.trim(type);
JSONObject attributes = nodeJson.getJSONObject("attributes");
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
if (CollUtil.isNotEmpty(attributes)){
@ -60,22 +66,33 @@ public class EREDTO {
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
if (CollUtil.isNotEmpty(attributes)){
for (String key : attributes.keySet()) {
if (StrUtil.isBlank(key)){
continue;
}
Object value = attributes.get(key);
if (value instanceof String){
if (StrUtil.isBlank((String) value)){
continue;
}
value = StrUtil.trim((String) value);
}
String valueString = attributes.getString(key);
ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0");
erAttributeDTOS.add(erAttributeDTO);
}
}
if (StrUtil.isEmpty(source) || StrUtil.isEmpty(target)){
if (StrUtil.isBlank(source) || StrUtil.isBlank(target)){
log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson);
continue;
}
Optional<EntityExtractionDTO> sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), source)).findFirst();
final String sourceTrim = StrUtil.trim(source);
Optional<EntityExtractionDTO> sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), sourceTrim)).findFirst();
if (sourceTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson);
continue;
}
Optional<EntityExtractionDTO> targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), target)).findFirst();
final String targetTrim = StrUtil.trim(target);
Optional<EntityExtractionDTO> targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getName(), targetTrim)).findFirst();
if (targetTypeOpt.isEmpty()){
log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson);
continue;
@ -108,12 +125,22 @@ public class EREDTO {
EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO();
entityExtractionDTO.setEntity("行");
// 避免表格行名重复
entityExtractionDTO.setName("行-" + UUID.randomUUID());
entityExtractionDTO.setName("行-" + RandomUtil.randomString(UUID.randomUUID().toString(), 10));
entityExtractionDTO.setTruncationId(truncationId);
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
for (Map.Entry<String, Object> tableEntry : tableJson.entrySet()) {
String key = tableEntry.getKey();
if (StrUtil.isBlank(key)){
continue;
}
key = StrUtil.trim(key);
Object value = tableEntry.getValue();
if (value instanceof String){
if (StrUtil.isBlank(value.toString())){
continue;
}
value = StrUtil.trim((String) value);
}
ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, value.toString(), value instanceof Number ? "1" : "0");
erAttributeDTOS.add(erAttributeDTO);
}

@ -1,5 +1,6 @@
package com.supervision.pdfqaserver.dto;
import com.supervision.pdfqaserver.domain.TruncationEntityExtraction;
import lombok.Data;
import java.util.ArrayList;
@ -35,6 +36,13 @@ public class EntityExtractionDTO {
public EntityExtractionDTO() {
}
public EntityExtractionDTO(TruncationEntityExtraction entityExtraction) {
this.id = entityExtraction.getId();
this.truncationId = entityExtraction.getTruncationId();
this.entity = entityExtraction.getEntity();
this.name = entityExtraction.getName();
}
public EntityExtractionDTO(String truncationId, String entity, String name, List<ERAttributeDTO> attributes) {
this.truncationId = truncationId;
this.entity = entity;

@ -51,7 +51,17 @@ public class RelationExtractionDTO {
public RelationExtractionDTO() {
}
public RelationExtractionDTO(String truncationId,String source, String sourceType,String relation, String target,String targetType, List<ERAttributeDTO> attributes) {
public RelationExtractionDTO(TruncationRelationExtraction relationExtraction) {
this.id = relationExtraction.getId();
this.truncationId = relationExtraction.getTruncationId();
this.source = relationExtraction.getSource();
this.sourceType = relationExtraction.getSourceType();
this.relation = relationExtraction.getRelation();
this.target = relationExtraction.getTarget();
this.targetType = relationExtraction.getTargetType();
}
public RelationExtractionDTO(String truncationId, String source, String sourceType, String relation, String target, String targetType, List<ERAttributeDTO> attributes) {
this.truncationId = truncationId;
this.source = source;
this.relation = relation;

@ -18,5 +18,11 @@ public interface DocumentTruncationService extends IService<DocumentTruncation>
void deleteByDocumentId(String documentId);
void deleteByDocumentIds(List<String> documentIds);
List<DocumentTruncation> queryByDocumentId(String documentId);
List<DocumentTruncation> queryByDocumentIds(List<String> documentIds);
List<DocumentTruncation> queryNotERETruncate(List<String> documentIds);
}

@ -1,6 +1,8 @@
package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.dto.EREDTO;
import com.supervision.pdfqaserver.dto.TruncateDTO;
import java.util.List;
/**
*
@ -14,12 +16,16 @@ public interface KnowledgeGraphService {
*/
void generateGraph(String documentId);
void generateGraph(List<EREDTO> eredtoList);
List<EREDTO> truncateERE(List<TruncateDTO> truncateDTOS);
/**
*
* @param documentId ID
* @param pdfId pdfId
*/
void resetGraphData(String documentId);
void resetGraphData(String pdfId);
/**
*
@ -31,4 +37,6 @@ public interface KnowledgeGraphService {
void saveERE(EREDTO eredto, String truncationId);
List<EREDTO> listPdfEREDTO(String pdfId);
}

@ -15,4 +15,6 @@ public interface TruncationEntityExtractionService extends IService<TruncationEn
void saveERE(List<EntityExtractionDTO> entities);
void deleteByTruncationId(String truncationId);
List<TruncationEntityExtraction> queryByTruncationIds(List<String> truncationIds);
}

@ -14,4 +14,6 @@ public interface TruncationErAttributeService extends IService<TruncationErAttri
void deleteByTerId(String terId);
void deleteByTerIds(List<String> terIds);
List<TruncationErAttribute> queryByTerIds(List<String> terIds);
}

@ -16,4 +16,5 @@ public interface TruncationRelationExtractionService extends IService<Truncation
void deleteByTruncationId(String truncationId);
List<TruncationRelationExtraction> queryByTruncationIds(List<String> documentIds);
}

@ -19,7 +19,8 @@ public class ChinesEsToEnglishGeneratorImpl implements ChinesEsToEnglishGenerato
public String generate(String chinese) {
log.info("generate:开始翻译: {}",chinese);
String prompt = PromptCache.promptMap.get(CHINESE_TO_ENGLISH);
String response = ollamaChatModel.call(StrUtil.format(prompt, chinese));
String format = StrUtil.format(prompt, chinese);
String response = ollamaChatModel.call(format);
log.info("generate:chinese:{}翻译结果: {}",chinese,response);
return response;
}

@ -10,6 +10,7 @@ import com.supervision.pdfqaserver.mapper.DocumentTruncationMapper;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.ArrayList;
import java.util.List;
/**
@ -42,10 +43,31 @@ public class DocumentTruncationServiceImpl extends ServiceImpl<DocumentTruncatio
this.lambdaUpdate().eq(DocumentTruncation::getDocumentId, documentId).remove();
}
@Override
public void deleteByDocumentIds(List<String> documentIds) {
if (CollUtil.isEmpty(documentIds)){
return;
}
this.lambdaUpdate().in(DocumentTruncation::getDocumentId, documentIds).remove();
}
@Override
public List<DocumentTruncation> queryByDocumentId(String documentId) {
return this.lambdaQuery().eq(DocumentTruncation::getDocumentId, documentId).list();
}
@Override
public List<DocumentTruncation> queryByDocumentIds(List<String> documentIds) {
if (CollUtil.isEmpty(documentIds)){
return new ArrayList<>();
}
return this.lambdaQuery().in(DocumentTruncation::getDocumentId, documentIds).list();
}
@Override
public List<DocumentTruncation> queryNotERETruncate(List<String> documentIds) {
return null;
}
}

@ -2,13 +2,12 @@ package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.date.TimeInterval;
import cn.hutool.core.util.NumberUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil;
import com.supervision.pdfqaserver.constant.DomainMetaGenerationEnum;
import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
import com.supervision.pdfqaserver.domain.DocumentTruncation;
import com.supervision.pdfqaserver.domain.DomainMetadata;
import com.supervision.pdfqaserver.domain.*;
import com.supervision.pdfqaserver.dto.*;
import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
import com.supervision.pdfqaserver.service.*;
import com.supervision.pdfqaserver.thread.KnowledgeGraphGenerateTreadPool;
import lombok.RequiredArgsConstructor;
@ -18,6 +17,7 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
@Slf4j
@Service
@ -38,6 +38,10 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
private final TruncationEntityExtractionService truncationEntityExtractionService;
private final TruncationRelationExtractionService truncationRelationExtractionService;
private final TruncationErAttributeService truncationErAttributeService;
private final TruncationRelationExtractionService relationExtractionService;
private final ChinesEsToEnglishGenerator chinesEsToEnglishGenerator;
@ -65,24 +69,15 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
// 对切分后的文档进行命名实体识别
timer.start("doEre");
log.info("开始命名实体识别...");
List<EREDTO> eredtoList = new ArrayList<>();
for (TruncateDTO truncateDTO : truncateDTOS) {
EREDTO eredto = null;
try {
eredto = tripleConversionPipeline.doEre(truncateDTO);
} catch (Exception e) {
log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e);
}
if (null == eredto){
continue;
}
// 保存实体关系抽取结果
this.saveERE(eredto, truncateDTO.getId());
eredtoList.add(eredto);
}
List<EREDTO> eredtoList = truncateERE(truncateDTOS);
log.info("命名实体识别完成,耗时:{}秒", timer.intervalSecond("doEre"));
// 合并实体关系抽取结果
generateGraph(eredtoList);
}
@Override
public void generateGraph(List<EREDTO> eredtoList) {
log.info("开始合并实体关系抽取结果...");
List<EREDTO> mergedList = tripleConversionPipeline.mergeEreResults(eredtoList);
log.info("合并实体关系抽取结果完成,合并后个数:{}", mergedList.size());
@ -137,23 +132,100 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
if (CollUtil.isEmpty(eredto.getEntities()) && CollUtil.isEmpty(eredto.getRelations())){
continue;
}
// 构造一个字典
allWords = getChineseEnglishWords(eredto);
eredto.setEn(allWords);
try {
tripleToCypherExecutor.saveERE(eredto);
} catch (Exception e) {
log.info("生成cypher语句失败,切分文档id:{}", JSONUtil.toJsonStr(eredto), e);
}
}
}
private static List<ChineseEnglishWords> getChineseEnglishWords(EREDTO eredto) {
List<ChineseEnglishWords> allWords;
allWords = eredto.getEntities().stream().flatMap(entity -> {
List<ChineseEnglishWords> collect = entity.getAttributes().stream().map(e -> {
ChineseEnglishWords words = new ChineseEnglishWords();
words.setChineseWord(e.getAttribute());
words.setEnglishWord(e.getAttribute());
return words;
}).collect(Collectors.toList());
ChineseEnglishWords words = new ChineseEnglishWords();
words.setChineseWord(entity.getEntity());
words.setEnglishWord(entity.getEntity());
collect.add(words);
return collect.stream();
}).collect(Collectors.toList());
eredto.getRelations().stream().flatMap(relation -> {
List<ChineseEnglishWords> words = relation.getAttributes().stream().map(e -> {
ChineseEnglishWords word = new ChineseEnglishWords();
word.setChineseWord(e.getAttribute());
word.setEnglishWord(e.getAttribute());
return word;
}).collect(Collectors.toList());
ChineseEnglishWords words1 = new ChineseEnglishWords();
words1.setChineseWord(relation.getRelation());
words1.setEnglishWord(relation.getRelation());
words.add(words1);
ChineseEnglishWords words2 = new ChineseEnglishWords();
words2.setChineseWord(relation.getSourceType());
words2.setEnglishWord(relation.getSourceType());
words.add(words2);
ChineseEnglishWords words3 = new ChineseEnglishWords();
words3.setChineseWord(relation.getTargetType());
words3.setEnglishWord(relation.getTargetType());
words.add(words3);
return words.stream();
}).forEach(allWords::add);
return allWords;
}
@Override
public List<EREDTO> truncateERE(List<TruncateDTO> truncateDTOS) {
List<EREDTO> eredtoList = new ArrayList<>();
int truncateSize = truncateDTOS.size();
int index = 1;
for (TruncateDTO truncateDTO : truncateDTOS) {
log.info("开始命名实体识别,切分文档id:{},识别进度:{}", truncateDTO.getId(), NumberUtil.formatPercent((index*1.0)/truncateSize, 2));
index++;
EREDTO eredto = null;
try {
eredto = tripleConversionPipeline.doEre(truncateDTO);
} catch (Exception e) {
log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e);
}
if (null == eredto){
continue;
}
// 保存实体关系抽取结果
this.saveERE(eredto, truncateDTO.getId());
eredtoList.add(eredto);
}
return eredtoList;
}
@Override
@Transactional(rollbackFor = Exception.class)
public void resetGraphData(String documentId) {
log.info("resetGraphData:重置知识图谱数据,documentId:{}", documentId);
List<DocumentTruncation> documentTruncations = documentTruncationService.queryByDocumentId(documentId);
public void resetGraphData(String pdfId) {
log.info("resetGraphData:重置知识图谱数据,pdfId:{}", pdfId);
List<PdfAnalysisOutput> pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(Integer.valueOf(pdfId));
if (CollUtil.isEmpty(pdfAnalysisOutputs)){
log.info("没有找到pdfId为{}的pdf分析结果", pdfId);
return;
}
List<String> documentIds = pdfAnalysisOutputs.stream().map(p -> String.valueOf(p.getId())).toList();
List<DocumentTruncation> documentTruncations = documentTruncationService.queryByDocumentIds(documentIds);
if (CollUtil.isEmpty(documentTruncations)){
log.info("没有找到文档切分数据,documentId:{},不用重置数据...", documentId);
log.info("没有找到文档切分数据,pdfId:{},不用重置数据...", pdfId);
return;
}
// 删除切分数据
documentTruncationService.deleteByDocumentId(documentId);
documentTruncationService.deleteByDocumentIds(documentIds);
for (DocumentTruncation documentTruncation : documentTruncations) {
String truncationId = documentTruncation.getId();
// 删除实体数据
@ -161,7 +233,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
// 删除关系数据
relationExtractionService.deleteByTruncationId(truncationId);
}
log.info("重置知识图谱数据完成,documentId:{}", documentId);
log.info("重置知识图谱数据完成,pdfId:{}", pdfId);
}
@ -214,4 +286,46 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
relationExtractionService.saveERE(eredto.getRelations());
}
@Override
public List<EREDTO> listPdfEREDTO(String pdfId) {
List<PdfAnalysisOutput> pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(Integer.valueOf(pdfId));
if (CollUtil.isEmpty(pdfAnalysisOutputs)){
log.info("没有找到pdfId为{}的pdf分析结果", pdfId);
return new ArrayList<>();
}
List<String> documentIds = pdfAnalysisOutputs.stream().map(p -> p.getId().toString()).toList();
List<DocumentTruncation> documentTruncations = documentTruncationService.queryByDocumentIds(documentIds);
List<String> truncationIds = documentTruncations.stream().map(DocumentTruncation::getId).toList();
List<TruncationEntityExtraction> truncationEntityExtractions = truncationEntityExtractionService.queryByTruncationIds(truncationIds);
List<TruncationRelationExtraction> truncationRelationExtractions = truncationRelationExtractionService.queryByTruncationIds(truncationIds);
List<String> teIds = truncationEntityExtractions.stream().map(TruncationEntityExtraction::getId).toList();
List<String> trIds = truncationRelationExtractions.stream().map(TruncationRelationExtraction::getId).collect(Collectors.toList());
trIds.addAll(teIds);
List<TruncationErAttribute> truncationErAttributes = truncationErAttributeService.queryByTerIds(trIds);
List<EREDTO> eres = new ArrayList<>();
for (TruncationEntityExtraction entityExtraction : truncationEntityExtractions) {
EREDTO eredto = new EREDTO();
EntityExtractionDTO extractionDTO = new EntityExtractionDTO(entityExtraction);
List<ERAttributeDTO> attributes = truncationErAttributes.stream()
.filter(t -> StrUtil.equals(entityExtraction.getId(), t.getTerId())).map(ERAttributeDTO::new).collect(Collectors.toList());
extractionDTO.setAttributes(attributes);
eredto.getEntities().add(extractionDTO);
eres.add(eredto);
}
for (TruncationRelationExtraction relationExtraction : truncationRelationExtractions) {
EREDTO eredto = new EREDTO();
RelationExtractionDTO extractionDTO = new RelationExtractionDTO(relationExtraction);
List<ERAttributeDTO> attributes = truncationErAttributes.stream()
.filter(t -> StrUtil.equals(relationExtraction.getId(), t.getTerId())).map(ERAttributeDTO::new).collect(Collectors.toList());
extractionDTO.setAttributes(attributes);
eredto.getRelations().add(extractionDTO);
eres.add(eredto);
}
return eres;
}
}

@ -3,6 +3,7 @@ package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.BooleanUtil;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.StrUtil;
import com.supervision.pdfqaserver.cache.PromptCache;
import com.supervision.pdfqaserver.constant.LayoutTypeEnum;
@ -62,9 +63,40 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
truncateDTOS.add(truncateDTO);
}
} else if (LayoutTypeEnum.TABLE.getCode() == layoutType) {
// 如果是表格类型的布局,直接添加到列表中
// 如果是表格类型的布局,进行切分
// 提前抽取表名
TableTitleDTO tableTitleDTO = this.extractTableTitle(documentDTO.getTitle());
if (null != tableTitleDTO && StrUtil.isNotEmpty(tableTitleDTO.getTitle())){
documentDTO.setTitle(tableTitleDTO.getTitle());
}else {
// 生成一个默认的表
documentDTO.setTitle("tableName-"+ RandomUtil.randomString(10));
}
List<String> tableRows = StrUtil.split(documentDTO.getContent(), "\n").stream().filter(StrUtil::isNotEmpty).collect(Collectors.toList());
if (tableRows.size()<5){
TruncateDTO truncateDTO = new TruncateDTO(documentDTO);
truncateDTOS.add(truncateDTO);
continue;
}
String tableTitle = tableRows.get(0);
// 标题分割符
String tableTitleSplit = tableRows.get(1);
List<String> noTitleRows = tableRows.subList(2,tableRows.size()-1);
List<List<String>> rows = CollUtil.split(noTitleRows, 4);
for (List<String> row : rows) {
StringBuilder sb = new StringBuilder();
sb.append(tableTitle).append("\n");
sb.append(tableTitleSplit).append("\n");
for (String s : row) {
sb.append(s).append("\n");
}
TruncateDTO truncateDTO = new TruncateDTO(documentDTO);
truncateDTO.setContent(sb.toString());
truncateDTOS.add(truncateDTO);
}
} else {
log.info("sliceDocuments:错误的布局类型: {}", layoutType);
}
@ -89,9 +121,10 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
if (classify){
return doTextEre(truncateDTO);
}
return doTableEre(truncateDTO);
}
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
log.warn("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
return null;
}
@ -118,7 +151,14 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
@Override
public TableTitleDTO extractTableTitle(String content) {
return null;
TableTitleDTO tableTitleDTO = new TableTitleDTO();
if (StrUtil.isEmpty(content)){
log.warn("extractTableTitle:内容为空");
return tableTitleDTO;
}
String table = PromptCache.promptMap.get(PromptCache.EXTRACT_TABLE_TITLE);
tableTitleDTO.setTitle(table);
return tableTitleDTO;
}
private EREDTO doTextEre(TruncateDTO truncateDTO) {
@ -140,6 +180,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
// 手动设置表格标题
EntityExtractionDTO titleEntity = new EntityExtractionDTO();
titleEntity.setEntity("表");
titleEntity.setTruncationId(truncateDTO.getId());
titleEntity.setName(truncateDTO.getTitle());
// 添加关系
List<RelationExtractionDTO> relations = new ArrayList<>();

@ -62,6 +62,7 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
Map<String, Object> attributes = entity.getAttributes().stream().collect(Collectors.toMap(
ERAttributeDTO::getAttributeEn, ERAttributeDTO::getValue
));
attributes.put("truncationId", entity.getTruncationId());
attributes.put("name", entity.getName());
log.info("保存节点{},属性:{}", entity.getEntityEn(),JSONUtil.toJsonStr(entity.getAttributes()));
List<Long> nodeIds = neo4jRepository.saveOrUpdateEntityNode(entity.getEntityEn(), "name", attributes);
@ -86,6 +87,8 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
Map<String, Object> attributes = relation.getAttributes().stream().collect(Collectors.toMap(
ERAttributeDTO::getAttributeEn, ERAttributeDTO::getValue
));
attributes.put("sourceType", relation.getSourceType());
attributes.put("truncationId", relation.getTruncationId());
for (Long sourceNodeId : sourceNodeIds) {
for (Long targetNodeId : targetNodeIds) {
if (sourceNodeId.equals(targetNodeId)) {

@ -13,7 +13,7 @@ import com.supervision.pdfqaserver.service.TruncationErAttributeService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
/**
@ -65,6 +65,14 @@ public class TruncationEntityExtractionServiceImpl extends ServiceImpl<Truncatio
this.lambdaUpdate().eq(TruncationEntityExtraction::getTruncationId, truncationId).remove();
}
}
@Override
public List<TruncationEntityExtraction> queryByTruncationIds(List<String> truncationIds) {
if (CollUtil.isEmpty(truncationIds)){
return new ArrayList<>();
}
return this.lambdaQuery().in(TruncationEntityExtraction::getTruncationId, truncationIds).list();
}
}

@ -7,6 +7,7 @@ import com.supervision.pdfqaserver.domain.TruncationErAttribute;
import com.supervision.pdfqaserver.service.TruncationErAttributeService;
import com.supervision.pdfqaserver.mapper.TruncationErAttributeMapper;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
/**
@ -34,6 +35,14 @@ public class TruncationErAttributeServiceImpl extends ServiceImpl<TruncationErAt
}
this.lambdaUpdate().in(TruncationErAttribute::getTerId, terIds).remove();
}
@Override
public List<TruncationErAttribute> queryByTerIds(List<String> terIds) {
if (CollUtil.isEmpty(terIds)){
return new ArrayList<>();
}
return this.lambdaQuery().in(TruncationErAttribute::getTerId, terIds).list();
}
}

@ -13,6 +13,7 @@ import com.supervision.pdfqaserver.mapper.TruncationRelationExtractionMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
/**
@ -60,6 +61,14 @@ public class TruncationRelationExtractionServiceImpl extends ServiceImpl<Truncat
this.lambdaUpdate().eq(TruncationRelationExtraction::getTruncationId, truncationId).remove();
}
}
@Override
public List<TruncationRelationExtraction> queryByTruncationIds(List<String> documentIds) {
if (CollUtil.isEmpty(documentIds)){
return new ArrayList<>();
}
return this.lambdaQuery().in(TruncationRelationExtraction::getTruncationId, documentIds).list();
}
}

@ -44,7 +44,7 @@
<maxFileSize>100MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<!--日志文档保留天数-->
<maxHistory>2</maxHistory>
<maxHistory>30</maxHistory>
<totalSizeCap>500MB</totalSizeCap>
</rollingPolicy>
<!-- 此日志文档只记录debug级别的 -->
@ -72,7 +72,7 @@
<maxFileSize>100MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<!--日志文档保留天数-->
<maxHistory>2</maxHistory>
<maxHistory>30</maxHistory>
<totalSizeCap>1GB</totalSizeCap>
</rollingPolicy>
<!-- 此日志文档只记录info级别的 -->
@ -99,7 +99,7 @@
<maxFileSize>100MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<!--日志文档保留天数-->
<maxHistory>2</maxHistory>
<maxHistory>30</maxHistory>
<totalSizeCap>500MB</totalSizeCap>
</rollingPolicy>
<!-- 此日志文档只记录warn级别的 -->
@ -126,7 +126,7 @@
<maxFileSize>100MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<!--日志文档保留天数-->
<maxHistory>2</maxHistory>
<maxHistory>30</maxHistory>
<totalSizeCap>500MB</totalSizeCap>
</rollingPolicy>
<!-- 此日志文档只记录ERROR级别的 -->

@ -1,5 +1,6 @@
package com.supervision.pdfqaserver;
import com.supervision.pdfqaserver.dto.EREDTO;
import com.supervision.pdfqaserver.service.ChinesEsToEnglishGenerator;
import com.supervision.pdfqaserver.service.KnowledgeGraphService;
import com.supervision.pdfqaserver.service.TripleConversionPipeline;
@ -22,7 +23,15 @@ class PdfQaServerApplicationTests {
private KnowledgeGraphService knowledgeGraphService;
@Test
void generateGraphTest() {
knowledgeGraphService.generateGraph("1");
knowledgeGraphService.generateGraph("40");
log.info("finish...");
}
@Test
void testGenerateGraph2() {
List<EREDTO> eredtos = knowledgeGraphService.listPdfEREDTO("17");
knowledgeGraphService.generateGraph(eredtos);
log.info("finish...");
}

Loading…
Cancel
Save