generateGraph功能优化

master
xueqingkun 1 month ago
parent 756a05533f
commit 5a75f9b810

@ -3,7 +3,9 @@ package com.supervision.pdfqaserver;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.EnableAspectJAutoProxy;
@EnableAspectJAutoProxy(exposeProxy = true)
@MapperScan(basePackages = {"com.supervision.pdfqaserver.mapper"})
@SpringBootApplication
public class PdfQaServerApplication {

@ -0,0 +1,45 @@
package com.supervision.pdfqaserver.config;
import cn.hutool.core.util.StrUtil;
import org.aspectj.lang.ProceedingJoinPoint;
import org.aspectj.lang.annotation.Around;
import org.aspectj.lang.annotation.Aspect;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
@Aspect
@Component
public class OllamaChatModelAspect {
@Value("${spring.ai.ollama.chat.model}")
private String model;
private String callStringMessage = "String org.springframework.ai.chat.model.ChatModel.call(String)";
/**
* ollamacall/no_thinkthink
* @param joinPoint joinPoint
* @return Object
* @throws Throwable
*/
@Around("execution(* org.springframework.ai.chat.model.ChatModel.call(..))")
public Object aroundMethodExecution(ProceedingJoinPoint joinPoint) throws Throwable {
String signature = joinPoint.getSignature().toString();
if (StrUtil.equals(model,"qwen3:30b-a3b") && StrUtil.equals(signature, callStringMessage)) {
Object[] args = joinPoint.getArgs();
if (args.length > 0) {
String arg = (String) args[0];
args[0] = arg + "\n /no_think";
}
}
// 执行原方法
Object result = joinPoint.proceed();
if (StrUtil.equals(model,"qwen3:30b-a3b") && StrUtil.equals(signature, callStringMessage)) {
result = ((String) result).replaceAll("(?is)<think\\b[^>]*>(.*?)</think>", "").trim();
}
return result;
}
}

@ -0,0 +1,36 @@
package com.supervision.pdfqaserver.controller;
import com.supervision.pdfqaserver.dto.R;
import com.supervision.pdfqaserver.service.KnowledgeGraphService;
import com.supervision.pdfqaserver.vo.PdfToMdFinishReqVo;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
@Slf4j
@RestController
@RequestMapping("/chat")
@RequiredArgsConstructor
public class NotifyController {
private final KnowledgeGraphService knowledgeGraphService;
/**
* pdf
* @param pdfToMdFinishReqVo pdfToMdFinishReqVo
* @return
*/
@PostMapping("/pdf-to-md/finish")
public R<String> pageList(@RequestBody PdfToMdFinishReqVo pdfToMdFinishReqVo) {
log.info("pdf转化完成通知回调, pdfId:{}, processStatus:{}", pdfToMdFinishReqVo.getPfdId(), pdfToMdFinishReqVo.getProcessStatus());
if ("2".equals(pdfToMdFinishReqVo.getProcessStatus())) {
knowledgeGraphService.submitGenerateTask(pdfToMdFinishReqVo.getPfdId());
}
return R.ok("success");
}
}

@ -28,6 +28,32 @@ public class PdfInfo implements Serializable {
*/
private String filename;
/**
* 0: 1 2 3 4 5 6
*/
private int processStatus;
/**
*
*/
private LocalDateTime analysisStartTime;
/**
*
*/
private LocalDateTime analysisEndTime;
/**
*
*/
private LocalDateTime extractionStartTime;
/**
*
*/
private LocalDateTime extractionEndTime;
/**
*
*/

@ -1,13 +1,13 @@
package com.supervision.pdfqaserver.dto;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.UUID;
import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import java.util.*;
/**
@ -107,7 +107,8 @@ public class EREDTO {
}
EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO();
entityExtractionDTO.setEntity("行");
entityExtractionDTO.setName("行");
// 避免表格行名重复
entityExtractionDTO.setName("行-" + UUID.randomUUID());
entityExtractionDTO.setTruncationId(truncationId);
List<ERAttributeDTO> erAttributeDTOS = new ArrayList<>();
for (Map.Entry<String, Object> tableEntry : tableJson.entrySet()) {

@ -15,4 +15,8 @@ public interface DocumentTruncationService extends IService<DocumentTruncation>
void batchSave(List<TruncateDTO> truncateDTOS);
void deleteByDocumentId(String documentId);
List<DocumentTruncation> queryByDocumentId(String documentId);
}

@ -14,6 +14,19 @@ public interface KnowledgeGraphService {
*/
void generateGraph(String documentId);
/**
*
* @param documentId ID
*/
void resetGraphData(String documentId);
/**
*
* @param documentId
*/
void submitGenerateTask(String documentId);
void queryGraph(String databaseId, String query);

@ -2,6 +2,7 @@ package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.domain.PdfInfo;
import com.baomidou.mybatisplus.extension.service.IService;
import java.util.List;
/**
* @author Administrator
@ -10,4 +11,12 @@ import com.baomidou.mybatisplus.extension.service.IService;
*/
public interface PdfInfoService extends IService<PdfInfo> {
void pdfToGraphStart(String pdfId);
void pdfToGraphComplete(String pdfId);
void pdfToGraphFail(String pdfId);
List<PdfInfo> listNeedGenerateGraph(Integer limit);
}

@ -3,7 +3,6 @@ package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.domain.TruncationEntityExtraction;
import com.baomidou.mybatisplus.extension.service.IService;
import com.supervision.pdfqaserver.dto.EntityExtractionDTO;
import java.util.List;
/**
@ -14,4 +13,6 @@ import java.util.List;
public interface TruncationEntityExtractionService extends IService<TruncationEntityExtraction> {
void saveERE(List<EntityExtractionDTO> entities);
void deleteByTruncationId(String truncationId);
}

@ -2,6 +2,7 @@ package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.domain.TruncationErAttribute;
import com.baomidou.mybatisplus.extension.service.IService;
import java.util.List;
/**
* @author Administrator
@ -10,4 +11,7 @@ import com.baomidou.mybatisplus.extension.service.IService;
*/
public interface TruncationErAttributeService extends IService<TruncationErAttribute> {
void deleteByTerId(String terId);
void deleteByTerIds(List<String> terIds);
}

@ -13,4 +13,7 @@ import java.util.List;
public interface TruncationRelationExtractionService extends IService<TruncationRelationExtraction> {
void saveERE(List<RelationExtractionDTO> relations);
void deleteByTruncationId(String truncationId);
}

@ -1,12 +1,14 @@
package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.supervision.pdfqaserver.domain.DocumentTruncation;
import com.supervision.pdfqaserver.dto.TruncateDTO;
import com.supervision.pdfqaserver.service.DocumentTruncationService;
import com.supervision.pdfqaserver.mapper.DocumentTruncationMapper;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.List;
@ -30,6 +32,20 @@ public class DocumentTruncationServiceImpl extends ServiceImpl<DocumentTruncatio
truncateDTO.setId(documentTruncation.getId());
}
}
@Override
@Transactional(rollbackFor = Exception.class)
public void deleteByDocumentId(String documentId) {
if (StrUtil.isEmpty(documentId)){
return;
}
this.lambdaUpdate().eq(DocumentTruncation::getDocumentId, documentId).remove();
}
@Override
public List<DocumentTruncation> queryByDocumentId(String documentId) {
return this.lambdaQuery().eq(DocumentTruncation::getDocumentId, documentId).list();
}
}

@ -5,13 +5,17 @@ import cn.hutool.core.date.TimeInterval;
import cn.hutool.core.util.StrUtil;
import com.supervision.pdfqaserver.constant.DomainMetaGenerationEnum;
import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
import com.supervision.pdfqaserver.domain.DocumentTruncation;
import com.supervision.pdfqaserver.domain.DomainMetadata;
import com.supervision.pdfqaserver.dto.*;
import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
import com.supervision.pdfqaserver.service.*;
import com.supervision.pdfqaserver.thread.KnowledgeGraphGenerateTreadPool;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.aop.framework.AopContext;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.ArrayList;
import java.util.List;
@ -38,8 +42,11 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
private final ChinesEsToEnglishGenerator chinesEsToEnglishGenerator;
private final PdfInfoService pdfInfoService;
@Override
public void generateGraph(String documentId) {
((KnowledgeGraphService)AopContext.currentProxy()).resetGraphData(documentId);
List<PdfAnalysisOutput> pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(Integer.valueOf(documentId));
if (CollUtil.isEmpty(pdfAnalysisOutputs)) {
log.info("没有找到pdfId为{}的pdf分析结果", documentId);
@ -131,6 +138,44 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
}
@Override
@Transactional(rollbackFor = Exception.class)
public void resetGraphData(String documentId) {
log.info("resetGraphData:重置知识图谱数据,documentId:{}", documentId);
List<DocumentTruncation> documentTruncations = documentTruncationService.queryByDocumentId(documentId);
if (CollUtil.isEmpty(documentTruncations)){
log.info("没有找到文档切分数据,documentId:{},不用重置数据...", documentId);
return;
}
// 删除切分数据
documentTruncationService.deleteByDocumentId(documentId);
for (DocumentTruncation documentTruncation : documentTruncations) {
String truncationId = documentTruncation.getId();
// 删除实体数据
truncationEntityExtractionService.deleteByTruncationId(truncationId);
// 删除关系数据
relationExtractionService.deleteByTruncationId(truncationId);
}
log.info("重置知识图谱数据完成,documentId:{}", documentId);
}
@Override
public void submitGenerateTask(String documentId) {
// 提交生成图任务
log.info("submitGenerateTask:提交知识图谱生成任务,documentId:{}", documentId);
KnowledgeGraphGenerateTreadPool.executorService.execute(() -> {
try {
pdfInfoService.pdfToGraphStart(documentId);
generateGraph(documentId);
pdfInfoService.pdfToGraphComplete(documentId);
} catch (Exception e) {
log.error("生成知识图谱失败,documentId:{}", documentId, e);
pdfInfoService.pdfToGraphFail(documentId);
}
});
}
private void saveWordsIfNecessary(String word, List<ChineseEnglishWords> allWords) {
boolean exists = chineseEnglishWordsService.wordsExists(word, allWords);

@ -5,6 +5,8 @@ import com.supervision.pdfqaserver.domain.PdfInfo;
import com.supervision.pdfqaserver.service.PdfInfoService;
import com.supervision.pdfqaserver.mapper.PdfInfoMapper;
import org.springframework.stereotype.Service;
import java.time.LocalDateTime;
import java.util.List;
/**
* @author Administrator
@ -15,6 +17,36 @@ import org.springframework.stereotype.Service;
public class PdfInfoServiceImpl extends ServiceImpl<PdfInfoMapper, PdfInfo>
implements PdfInfoService{
@Override
public void pdfToGraphStart(String pdfId) {
this.lambdaUpdate().eq(PdfInfo::getId, pdfId)
.set(PdfInfo::getProcessStatus, 4)
.set(PdfInfo::getExtractionStartTime, LocalDateTime.now())
.update();
}
@Override
public void pdfToGraphComplete(String pdfId) {
this.lambdaUpdate().eq(PdfInfo::getId, pdfId)
.set(PdfInfo::getProcessStatus, 5)
.set(PdfInfo::getExtractionEndTime, LocalDateTime.now())
.update();
}
@Override
public void pdfToGraphFail(String pdfId) {
this.lambdaUpdate().eq(PdfInfo::getId, pdfId)
.set(PdfInfo::getProcessStatus, 6)
.set(PdfInfo::getExtractionEndTime, LocalDateTime.now())
.update();
}
@Override
public List<PdfInfo> listNeedGenerateGraph(Integer limit) {
return this.lambdaQuery().eq(PdfInfo::getProcessStatus, 2)
.last("limit " + limit)
.list();
}
}

@ -75,11 +75,19 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TEXT.getCode()))){
return doTextEre(truncateDTO);
try {
return doTextEre(truncateDTO);
} catch (Exception e) {
log.error("doEre:文本实体关系抽取失败,内容:{}", truncateDTO.getContent(), e);
}
}
if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TABLE.getCode()))){
return doTableEre(truncateDTO);
try {
return doTableEre(truncateDTO);
} catch (Exception e) {
log.error("doEre:表格实体关系抽取失败,内容:{}", truncateDTO.getContent(), e);
}
}
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
return null;
@ -90,7 +98,6 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TEXT);
String formatted = StrUtil.format(prompt, truncateDTO.getContent());
String response = ollamaChatModel.call(formatted);
// todo:暂时不去处理异常返回
log.info("doTextEre响应结果:{}", response);
return EREDTO.fromTextJson(response, truncateDTO.getId());
}
@ -101,7 +108,6 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
String formatted = StrUtil.format(prompt, truncateDTO.getContent());
String response = ollamaChatModel.call(formatted);
log.info("doTableEre响应结果:{}", response);
// todo:暂时不去处理异常返回
EREDTO eredto = EREDTO.fromTableJson(response, truncateDTO.getId());
// 手动设置表格标题
EntityExtractionDTO titleEntity = new EntityExtractionDTO();

@ -93,7 +93,7 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
continue;
}
log.info("保存关系{}-{}-{}的属性:{}", relation.getSourceTypeEn(), relation.getRelationEn(),relation.getTargetTypeEn(), attributes);
neo4jRepository.saveOrUpdateRelation(sourceNodeId, targetNodeId, relation.getRelationEn(), false, false, attributes);
List<Long> longs = neo4jRepository.saveOrUpdateRelation(sourceNodeId, targetNodeId, relation.getRelationEn(), false, false, attributes);
}
}
}

@ -1,6 +1,7 @@
package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.supervision.pdfqaserver.domain.TruncationEntityExtraction;
import com.supervision.pdfqaserver.domain.TruncationErAttribute;
@ -49,6 +50,21 @@ public class TruncationEntityExtractionServiceImpl extends ServiceImpl<Truncatio
}
}
}
@Override
public void deleteByTruncationId(String truncationId) {
if (StrUtil.isEmpty(truncationId)){
return;
}
List<TruncationEntityExtraction> truncationEntityExtractions = this.lambdaQuery()
.eq(TruncationEntityExtraction::getTruncationId, truncationId)
.list();
if (CollUtil.isNotEmpty(truncationEntityExtractions)){
List<String> terIds = truncationEntityExtractions.stream().map(TruncationEntityExtraction::getId).toList();
truncationErAttributeService.deleteByTerIds(terIds);
this.lambdaUpdate().eq(TruncationEntityExtraction::getTruncationId, truncationId).remove();
}
}
}

@ -1,10 +1,13 @@
package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.supervision.pdfqaserver.domain.TruncationErAttribute;
import com.supervision.pdfqaserver.service.TruncationErAttributeService;
import com.supervision.pdfqaserver.mapper.TruncationErAttributeMapper;
import org.springframework.stereotype.Service;
import java.util.List;
/**
* @author Administrator
@ -15,6 +18,22 @@ import org.springframework.stereotype.Service;
public class TruncationErAttributeServiceImpl extends ServiceImpl<TruncationErAttributeMapper, TruncationErAttribute>
implements TruncationErAttributeService{
@Override
public void deleteByTerId(String terId) {
if (StrUtil.isEmpty(terId)){
return;
}
this.lambdaUpdate().eq(TruncationErAttribute::getTerId, terId).remove();
}
@Override
public void deleteByTerIds(List<String> terIds) {
if (CollUtil.isEmpty(terIds)){
return;
}
this.lambdaUpdate().in(TruncationErAttribute::getTerId, terIds).remove();
}
}

@ -1,6 +1,7 @@
package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.supervision.pdfqaserver.domain.TruncationErAttribute;
import com.supervision.pdfqaserver.domain.TruncationRelationExtraction;
@ -12,7 +13,6 @@ import com.supervision.pdfqaserver.mapper.TruncationRelationExtractionMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.List;
/**
@ -46,6 +46,20 @@ public class TruncationRelationExtractionServiceImpl extends ServiceImpl<Truncat
}
}
}
@Override
public void deleteByTruncationId(String truncationId) {
if (StrUtil.isEmpty(truncationId)){
return;
}
List<TruncationRelationExtraction> truncationRelationExtractions = this.lambdaQuery()
.eq(TruncationRelationExtraction::getTruncationId, truncationId).list();
if (CollUtil.isNotEmpty(truncationRelationExtractions)) {
List<String> terIds = truncationRelationExtractions.stream().map(TruncationRelationExtraction::getId).toList();
truncationErAttributeService.deleteByTerIds(terIds);
this.lambdaUpdate().eq(TruncationRelationExtraction::getTruncationId, truncationId).remove();
}
}
}

@ -0,0 +1,35 @@
package com.supervision.pdfqaserver.thread;
import cn.hutool.core.thread.ExecutorBuilder;
import cn.hutool.core.thread.ThreadFactoryBuilder;
import jakarta.annotation.PostConstruct;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
/**
* 线
*/
@Slf4j
@Component
public class KnowledgeGraphGenerateTreadPool {
@Value("${graph.generate.thread-pool.core:1}")
private int core;
@Value("${graph.generate.thread-pool.max:1}")
private int max;
public static ExecutorService executorService;
@PostConstruct
public void init() {
log.info("knowledgeGraphGenerateTreadPool线程池初始化。线程数{}", core);
executorService = ExecutorBuilder.create()
.setCorePoolSize(core).setMaxPoolSize(max)
.setWorkQueue(new LinkedBlockingQueue<>(1024))
.setThreadFactory(ThreadFactoryBuilder.create().setNamePrefix("graphGenerate").build())
.build();
}
}

@ -0,0 +1,17 @@
package com.supervision.pdfqaserver.vo;
import lombok.Data;
@Data
public class PdfToMdFinishReqVo {
/**
* pdfId
*/
private String pfdId;
/**
* pdf 2 3
*/
private String processStatus;
}

@ -8,11 +8,18 @@
<result property="id" column="id" jdbcType="INTEGER"/>
<result property="path" column="path" jdbcType="VARCHAR"/>
<result property="filename" column="filename" jdbcType="VARCHAR"/>
<result property="processStatus" column="process_status" jdbcType="VARCHAR"/>
<result property="analysisStartTime" column="analysis_start_time" jdbcType="TIMESTAMP"/>
<result property="analysisEndTime" column="analysis_end_time" jdbcType="TIMESTAMP"/>
<result property="extractionStartTime" column="extraction_start_time" jdbcType="TIMESTAMP"/>
<result property="extractionEndTime" column="extraction_end_time" jdbcType="TIMESTAMP"/>
<result property="createTime" column="create_time" jdbcType="TIMESTAMP"/>
</resultMap>
<sql id="Base_Column_List">
id,path,filename,
id,path,filename,process_status,
analysis_start_time,analysis_end_time,
extraction_start_time,extraction_end_time,
create_time
</sql>
</mapper>

@ -1,5 +1,6 @@
package com.supervision.pdfqaserver;
import com.supervision.pdfqaserver.service.ChinesEsToEnglishGenerator;
import com.supervision.pdfqaserver.service.KnowledgeGraphService;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test;
@ -79,5 +80,14 @@ class PdfQaServerApplicationTests {
return result.stream().map(record -> record.get("id").asLong()).collect(Collectors.toList());
}
}
@Autowired
private ChinesEsToEnglishGenerator chinesEsToEnglishGenerator;
@Test
void testChinesEsToEnglishGenerator() {
String chinese = "你好";
String english = chinesEsToEnglishGenerator.generate(chinese);
System.out.println("翻译结果: " + english);
}
}

Loading…
Cancel
Save