初始化代码

master
xueqingkun 9 hours ago
parent b9c957a74a
commit 830acca35d

@ -80,6 +80,11 @@
<artifactId>jackson-databind</artifactId> <artifactId>jackson-databind</artifactId>
<version>2.15.3</version> <version>2.15.3</version>
</dependency> </dependency>
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>4.5.4</version>
</dependency>
</dependencies> </dependencies>
<dependencyManagement> <dependencyManagement>
<dependencies> <dependencies>

@ -10,6 +10,10 @@ public class PromptCache {
public static final String DOERE_TEXT = "DOERE_TEXT"; public static final String DOERE_TEXT = "DOERE_TEXT";
public static final String DOERE_TABLE = "DOERE_TABLE"; public static final String DOERE_TABLE = "DOERE_TABLE";
public static final String CHINESE_TO_ENGLISH = "CHINESE_TO_ENGLISH";
public static final String ERE_TO_INSERT_CYPHER = "ERE_TO_INSERT_CYPHER";
public static final Map<String, String> promptMap = new HashMap<>(); public static final Map<String, String> promptMap = new HashMap<>();
static { static {
@ -18,6 +22,8 @@ public class PromptCache {
private static void init(){ private static void init(){
promptMap.put(DOERE_TEXT, DOERE_TEXT_PROMPT); promptMap.put(DOERE_TEXT, DOERE_TEXT_PROMPT);
promptMap.put(DOERE_TABLE, DOERE_TABLE_PROMPT); promptMap.put(DOERE_TABLE, DOERE_TABLE_PROMPT);
promptMap.put(CHINESE_TO_ENGLISH, CHINESE_TO_ENGLISH_PROMPT);
promptMap.put(ERE_TO_INSERT_CYPHER, ERE_TO_INSERT_CYPHER_PROMPT);
} }
@ -182,4 +188,51 @@ public class PromptCache {
"""; """;
private static final String CHINESE_TO_ENGLISH_PROMPT = """
""";
private static final String ERE_TO_INSERT_CYPHER_PROMPT = """
Neo4jCypher
1. ****`(n:Label {name: "Value"})``Label``Person``Company`
2. ****`[r:RELATION_TYPE]`
3. 使`MERGE`
4. Cypher
###
```json
[
{"source": "人物","sourceType": "Person", "relation": "创始人", "relationType": "FOUNDED","target": "公司","targetType": "Company"},
{"source": "公司","sourceType": "Company ", "relation": "位于", "relationType": "LOCATED_IN","target": "城市","targetType": "City "}
]
```
###
MERGE (p:Person {name: "人物"})
MERGE (c:Company {name: "公司"})
MERGE (city:City {name: "城市"})
MERGE (p)-[r1:FOUNDED]->(c)
MERGE (c)-[r2:LOCATED_IN]->(city)
###
1.
- "人物" `Person`
- "公司" `Company`
- "城市" `City`
2.
- "创始人" `FOUNDED`
- "位于" `LOCATED_IN`
3. `name`
###
1.
2. 使```Person`
3. MERGE
###
{}
""";
} }

@ -0,0 +1,23 @@
package com.supervision.pdfqaserver.constant;
import lombok.Getter;
/**
*
*/
@Getter
public enum DomainMetaGenerationEnum {
// 0=手动录入1=系统自动
DOM_MANUAL_ENTRY("0", "手动录入"),
SYSTEM_AUTO_GENERATION("1", "系统自动");
private final String code;
private final String name;
DomainMetaGenerationEnum(String code, String name) {
this.code = code;
this.name = name;
}
}

@ -0,0 +1,26 @@
package com.supervision.pdfqaserver.constant;
import lombok.Getter;
@Getter
public enum LayoutTypeEnum {
/**
*
*/
TEXT(0, "文本"),
/**
*
*/
TABLE(1, "表格");
private final int code;
private final String name;
LayoutTypeEnum(int code, String name) {
this.code = code;
this.name = name;
}
}

@ -1,7 +1,6 @@
package com.supervision.pdfqaserver.domain; package com.supervision.pdfqaserver.domain;
import com.baomidou.mybatisplus.annotation.*; import com.baomidou.mybatisplus.annotation.*;
import java.io.Serializable; import java.io.Serializable;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import lombok.Data; import lombok.Data;
@ -27,7 +26,7 @@ public class DocumentTruncation implements Serializable {
/** /**
* id pdf_analysis_outputid * id pdf_analysis_outputid
*/ */
private String sectionId; private Integer sectionId;
/** /**
* 0- 1- * 0- 1-

@ -14,8 +14,9 @@ public class DocumentDTO {
*/ */
private String id; private String id;
private Integer documentId; /**
* id
*/
private Integer sectionId; private Integer sectionId;
private Integer pageNo; private Integer pageNo;
@ -23,7 +24,7 @@ public class DocumentDTO {
/** /**
* 0 1 * 0 1
*/ */
private String layoutType; private Integer layoutType;
/** /**
* pdf * pdf
@ -48,11 +49,9 @@ public class DocumentDTO {
} }
public DocumentDTO(PdfAnalysisOutput pdfAnalysisOutput) { public DocumentDTO(PdfAnalysisOutput pdfAnalysisOutput) {
this.id = pdfAnalysisOutput.getPdfId().toString();
this.sectionId = pdfAnalysisOutput.getId(); this.sectionId = pdfAnalysisOutput.getId();
this.documentId = pdfAnalysisOutput.getPdfId(); this.layoutType = pdfAnalysisOutput.getLayoutType();
if (null != pdfAnalysisOutput.getLayoutType()) {
this.layoutType = pdfAnalysisOutput.getLayoutType().toString();
}
this.pageNo = pdfAnalysisOutput.getPageNo(); this.pageNo = pdfAnalysisOutput.getPageNo();
this.title = pdfAnalysisOutput.getTableTitle(); this.title = pdfAnalysisOutput.getTableTitle();
this.content = pdfAnalysisOutput.getContent(); this.content = pdfAnalysisOutput.getContent();

@ -1,5 +1,6 @@
package com.supervision.pdfqaserver.dto; package com.supervision.pdfqaserver.dto;
import com.supervision.pdfqaserver.domain.TruncationErAttribute;
import lombok.Data; import lombok.Data;
/** /**
@ -43,4 +44,14 @@ public class ERAttributeDTO {
this.value = value; this.value = value;
this.dataType = dataType; this.dataType = dataType;
} }
public TruncationErAttribute toTruncationErAttribute() {
TruncationErAttribute truncationErAttribute = new TruncationErAttribute();
truncationErAttribute.setTerId(this.terId);
truncationErAttribute.setType(this.type);
truncationErAttribute.setAttribute(this.attribute);
truncationErAttribute.setValue(this.value);
truncationErAttribute.setDataType(this.dataType);
return truncationErAttribute;
}
} }

@ -4,6 +4,7 @@ import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
import lombok.Data; import lombok.Data;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
@ -16,9 +17,9 @@ import java.util.*;
@Data @Data
public class EREDTO { public class EREDTO {
private List<EntityExtractionDTO> entities; private List<EntityExtractionDTO> entities = new ArrayList<>();
private List<RelationExtractionDTO> relations; private List<RelationExtractionDTO> relations = new ArrayList<>();
public EREDTO() { public EREDTO() {
} }
@ -121,4 +122,21 @@ public class EREDTO {
eredto.setEntities(entities); eredto.setEntities(entities);
return eredto; return eredto;
} }
public void setEn(List<ChineseEnglishWords> wordsList) {
if (CollUtil.isEmpty(wordsList)){
return;
}
for (EntityExtractionDTO entity : entities) {
String entityName = entity.getEntity();
Optional<ChineseEnglishWords> first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), entityName)).findFirst();
first.ifPresent(chineseEnglishWords -> entity.setEntityEn(chineseEnglishWords.getEnglishWord()));
}
for (RelationExtractionDTO relation : relations) {
String relationName = relation.getRelation();
Optional<ChineseEnglishWords> first = wordsList.stream().filter(w -> StrUtil.equals(w.getChineseWord(), relationName)).findFirst();
first.ifPresent(chineseEnglishWords -> relation.setRelationEn(chineseEnglishWords.getEnglishWord()));
}
}
} }

@ -18,6 +18,11 @@ public class EntityExtractionDTO {
*/ */
private String entity; private String entity;
/**
*
*/
private String entityEn;
/** /**
* *
*/ */

@ -1,5 +1,7 @@
package com.supervision.pdfqaserver.dto; package com.supervision.pdfqaserver.dto;
import com.supervision.pdfqaserver.domain.DomainMetadata;
import com.supervision.pdfqaserver.domain.TruncationRelationExtraction;
import lombok.Data; import lombok.Data;
import java.util.List; import java.util.List;
@ -23,11 +25,15 @@ public class RelationExtractionDTO {
*/ */
private String sourceType; private String sourceType;
private String sourceTypeEn;
/** /**
* *
*/ */
private String relation; private String relation;
private String relationEn;
/** /**
* *
*/ */
@ -38,6 +44,8 @@ public class RelationExtractionDTO {
*/ */
private String targetType; private String targetType;
private String targetTypeEn;
private List<ERAttributeDTO> attributes; private List<ERAttributeDTO> attributes;
public RelationExtractionDTO() { public RelationExtractionDTO() {
@ -52,4 +60,23 @@ public class RelationExtractionDTO {
this.sourceType = sourceType; this.sourceType = sourceType;
this.targetType = targetType; this.targetType = targetType;
} }
public TruncationRelationExtraction toTruncationRelationExtraction() {
TruncationRelationExtraction truncationRelationExtraction = new TruncationRelationExtraction();
truncationRelationExtraction.setTruncationId(this.truncationId);
truncationRelationExtraction.setSource(this.source);
truncationRelationExtraction.setSourceType(this.sourceType);
truncationRelationExtraction.setRelation(this.relation);
truncationRelationExtraction.setTarget(this.target);
truncationRelationExtraction.setTargetType(this.targetType);
return truncationRelationExtraction;
}
public DomainMetadata toDomainMetadata() {
DomainMetadata domainMetadata = new DomainMetadata();
domainMetadata.setSourceType(this.sourceType);
domainMetadata.setRelation(this.relation);
domainMetadata.setTargetType(this.targetType);
return domainMetadata;
}
} }

@ -27,7 +27,7 @@ public class TruncateDTO {
/** /**
* id pdf_analysis_outputid * id pdf_analysis_outputid
*/ */
private String sectionId; private Integer sectionId;
/** /**
* *
@ -40,6 +40,17 @@ public class TruncateDTO {
private String content; private String content;
public TruncateDTO() {
}
public TruncateDTO(DocumentDTO documentDTO) {
this.documentId = Integer.parseInt(documentDTO.getId());
this.sectionId = documentDTO.getSectionId();
this.layoutType = documentDTO.getLayoutType().toString();
this.title = documentDTO.getTitle();
this.content = documentDTO.getContent();
}
public DocumentTruncation toDocumentTruncation() { public DocumentTruncation toDocumentTruncation() {
DocumentTruncation truncation = new DocumentTruncation(); DocumentTruncation truncation = new DocumentTruncation();
truncation.setDocumentId(this.documentId); truncation.setDocumentId(this.documentId);

@ -0,0 +1,6 @@
package com.supervision.pdfqaserver.service;
public interface ChinesEsToEnglishGenerator {
String generate(String chinese);
}

@ -2,6 +2,7 @@ package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.domain.ChineseEnglishWords; import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
import com.baomidou.mybatisplus.extension.service.IService; import com.baomidou.mybatisplus.extension.service.IService;
import java.util.List;
/** /**
* @author Administrator * @author Administrator
@ -10,4 +11,11 @@ import com.baomidou.mybatisplus.extension.service.IService;
*/ */
public interface ChineseEnglishWordsService extends IService<ChineseEnglishWords> { public interface ChineseEnglishWordsService extends IService<ChineseEnglishWords> {
List<ChineseEnglishWords> queryAll();
boolean wordsExists(String word, List<ChineseEnglishWords> wordsList);
void saveIfNotExists(ChineseEnglishWords words);
} }

@ -10,4 +10,6 @@ import com.baomidou.mybatisplus.extension.service.IService;
*/ */
public interface DomainMetadataService extends IService<DomainMetadata> { public interface DomainMetadataService extends IService<DomainMetadata> {
void saveIfNotExists(DomainMetadata metadata);
} }

@ -2,6 +2,9 @@ package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.domain.TruncationEntityExtraction; import com.supervision.pdfqaserver.domain.TruncationEntityExtraction;
import com.baomidou.mybatisplus.extension.service.IService; import com.baomidou.mybatisplus.extension.service.IService;
import com.supervision.pdfqaserver.dto.EntityExtractionDTO;
import java.util.List;
/** /**
* @author Administrator * @author Administrator
@ -10,4 +13,5 @@ import com.baomidou.mybatisplus.extension.service.IService;
*/ */
public interface TruncationEntityExtractionService extends IService<TruncationEntityExtraction> { public interface TruncationEntityExtractionService extends IService<TruncationEntityExtraction> {
void saveERE(List<EntityExtractionDTO> entities);
} }

@ -2,6 +2,8 @@ package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.domain.TruncationRelationExtraction; import com.supervision.pdfqaserver.domain.TruncationRelationExtraction;
import com.baomidou.mybatisplus.extension.service.IService; import com.baomidou.mybatisplus.extension.service.IService;
import com.supervision.pdfqaserver.dto.RelationExtractionDTO;
import java.util.List;
/** /**
* @author Administrator * @author Administrator
@ -10,4 +12,5 @@ import com.baomidou.mybatisplus.extension.service.IService;
*/ */
public interface TruncationRelationExtractionService extends IService<TruncationRelationExtraction> { public interface TruncationRelationExtractionService extends IService<TruncationRelationExtraction> {
void saveERE(List<RelationExtractionDTO> relations);
} }

@ -0,0 +1,24 @@
package com.supervision.pdfqaserver.service.impl;
import com.supervision.pdfqaserver.cache.PromptCache;
import com.supervision.pdfqaserver.service.ChinesEsToEnglishGenerator;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.stereotype.Service;
import static com.supervision.pdfqaserver.cache.PromptCache.CHINESE_TO_ENGLISH;
@Slf4j
@Service
@RequiredArgsConstructor
public class ChinesEsToEnglishGeneratorImpl implements ChinesEsToEnglishGenerator {
private final OllamaChatModel ollamaChatModel;
@Override
public String generate(String chinese) {
log.info("generate:开始翻译: {}",chinese);
String prompt = PromptCache.promptMap.get(CHINESE_TO_ENGLISH);
ollamaChatModel.call("请将以下中文翻译成英文: " + chinese);
return null;
}
}

@ -1,10 +1,12 @@
package com.supervision.pdfqaserver.service.impl; package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.util.StrUtil;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.supervision.pdfqaserver.domain.ChineseEnglishWords; import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
import com.supervision.pdfqaserver.service.ChineseEnglishWordsService; import com.supervision.pdfqaserver.service.ChineseEnglishWordsService;
import com.supervision.pdfqaserver.mapper.ChineseEnglishWordsMapper; import com.supervision.pdfqaserver.mapper.ChineseEnglishWordsMapper;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.List;
/** /**
* @author Administrator * @author Administrator
@ -15,6 +17,26 @@ import org.springframework.stereotype.Service;
public class ChineseEnglishWordsServiceImpl extends ServiceImpl<ChineseEnglishWordsMapper, ChineseEnglishWords> public class ChineseEnglishWordsServiceImpl extends ServiceImpl<ChineseEnglishWordsMapper, ChineseEnglishWords>
implements ChineseEnglishWordsService{ implements ChineseEnglishWordsService{
@Override
public List<ChineseEnglishWords> queryAll() {
return this.list();
}
@Override
public boolean wordsExists(String word, List<ChineseEnglishWords> wordsList) {
if (StrUtil.isEmpty(word)){
return true;
}
return wordsList.stream().anyMatch(w->StrUtil.equals(w.getChineseWord(),word));
}
@Override
public void saveIfNotExists(ChineseEnglishWords words) {
boolean exists = this.lambdaQuery().eq(ChineseEnglishWords::getChineseWord, words.getChineseWord()).exists();
if (!exists){
this.save(words);
}
}
} }

@ -15,6 +15,17 @@ import org.springframework.stereotype.Service;
public class DomainMetadataServiceImpl extends ServiceImpl<DomainMetadataMapper, DomainMetadata> public class DomainMetadataServiceImpl extends ServiceImpl<DomainMetadataMapper, DomainMetadata>
implements DomainMetadataService{ implements DomainMetadataService{
@Override
public void saveIfNotExists(DomainMetadata metadata) {
boolean exists = this.lambdaQuery()
.eq(DomainMetadata::getSourceType, metadata.getSourceType())
.eq(DomainMetadata::getTargetType, metadata.getTargetType())
.eq(DomainMetadata::getRelation, metadata.getRelation()).exists();
if (!exists) {
this.save(metadata);
}
}
} }

@ -1,15 +1,16 @@
package com.supervision.pdfqaserver.service.impl; package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import com.supervision.pdfqaserver.dto.EREDTO; import cn.hutool.core.util.StrUtil;
import com.supervision.pdfqaserver.constant.DomainMetaGenerationEnum;
import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
import com.supervision.pdfqaserver.domain.DomainMetadata;
import com.supervision.pdfqaserver.dto.*;
import com.supervision.pdfqaserver.domain.PdfAnalysisOutput; import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
import com.supervision.pdfqaserver.dto.DocumentDTO;
import com.supervision.pdfqaserver.dto.TruncateDTO;
import com.supervision.pdfqaserver.service.*; import com.supervision.pdfqaserver.service.*;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -30,13 +31,11 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
private final PdfAnalysisOutputService pdfAnalysisOutputService; private final PdfAnalysisOutputService pdfAnalysisOutputService;
private final PdfInfoService pdfInfoService;
private final TruncationEntityExtractionService truncationEntityExtractionService; private final TruncationEntityExtractionService truncationEntityExtractionService;
private final TruncationRelationExtractionService relationExtractionService; private final TruncationRelationExtractionService relationExtractionService;
private final TruncationErAttributeService truncationErAttributeService; private final ChinesEsToEnglishGenerator chinesEsToEnglishGenerator;
@Override @Override
public void generateGraph(String documentId) { public void generateGraph(String documentId) {
@ -55,6 +54,9 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
List<EREDTO> eredtoList = new ArrayList<>(); List<EREDTO> eredtoList = new ArrayList<>();
for (TruncateDTO truncateDTO : truncateDTOS) { for (TruncateDTO truncateDTO : truncateDTOS) {
EREDTO eredto = tripleConversionPipeline.doEre(truncateDTO); EREDTO eredto = tripleConversionPipeline.doEre(truncateDTO);
if (null == eredto){
continue;
}
// 保存实体关系抽取结果 // 保存实体关系抽取结果
this.saveERE(eredto, truncateDTO.getId()); this.saveERE(eredto, truncateDTO.getId());
} }
@ -62,13 +64,63 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
// 合并实体关系抽取结果 // 合并实体关系抽取结果
List<EREDTO> mergedList = tripleConversionPipeline.mergeEreResults(eredtoList); List<EREDTO> mergedList = tripleConversionPipeline.mergeEreResults(eredtoList);
// 保存领域元数据
for (EREDTO eredto : mergedList) { for (EREDTO eredto : mergedList) {
String insertCypher = tripleToCypherExecutor.generateInsertCypher(eredto); List<RelationExtractionDTO> relations = eredto.getRelations();
if (CollUtil.isEmpty(relations)){
continue;
}
for (RelationExtractionDTO relation : relations) {
DomainMetadata domainMetadata = relation.toDomainMetadata();
domainMetadata.setDomainType("1");
domainMetadata.setGenerationType(DomainMetaGenerationEnum.SYSTEM_AUTO_GENERATION.getCode());
domainMetadataService.saveIfNotExists(domainMetadata);
}
}
// 保存字典
List<ChineseEnglishWords> allWords = chineseEnglishWordsService.queryAll();
for (EREDTO eredto : mergedList) {
List<EntityExtractionDTO> entities = eredto.getEntities();
if (CollUtil.isNotEmpty(entities)){
for (EntityExtractionDTO entityDTO : entities) {
saveWordsIfNecessary(entityDTO.getEntity(), allWords);
}
}
List<RelationExtractionDTO> relations = eredto.getRelations();
if (CollUtil.isNotEmpty(relations)){
for (RelationExtractionDTO relationDTO : relations) {
saveWordsIfNecessary(relationDTO.getRelation(), allWords);
}
}
}
// 生成cypher语句
for (EREDTO eredto : mergedList) {
eredto.setEn(allWords);
String insertCypher = tripleToCypherExecutor.generateInsertCypher(eredto);
log.info("insertCypher:{}", insertCypher);
tripleToCypherExecutor.executeCypher(insertCypher); tripleToCypherExecutor.executeCypher(insertCypher);
} }
}
private void saveWordsIfNecessary(String word, List<ChineseEnglishWords> allWords) {
boolean exists = chineseEnglishWordsService.wordsExists(word, allWords);
if (exists){
return;
}
String generate = chinesEsToEnglishGenerator.generate(word);
if (StrUtil.isEmpty(generate)){
log.info("生成英文名称失败entity:{}", word);
return;
}
ChineseEnglishWords words = new ChineseEnglishWords();
words.setChineseWord(word);
words.setEnglishWord(generate);
chineseEnglishWordsService.saveIfNotExists(words);
allWords.add(words);// 更新缓存
} }
@Override @Override
@ -79,6 +131,11 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
@Override @Override
public void saveERE(EREDTO eredto, String truncationId) { public void saveERE(EREDTO eredto, String truncationId) {
// 保存实体信息
truncationEntityExtractionService.saveERE(eredto.getEntities());
// 保存关系
relationExtractionService.saveERE(eredto.getRelations());
} }
} }

@ -3,15 +3,19 @@ package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import com.supervision.pdfqaserver.cache.PromptCache; import com.supervision.pdfqaserver.cache.PromptCache;
import com.supervision.pdfqaserver.constant.LayoutTypeEnum;
import com.supervision.pdfqaserver.dto.*; import com.supervision.pdfqaserver.dto.*;
import com.supervision.pdfqaserver.service.TripleConversionPipeline; import com.supervision.pdfqaserver.service.TripleConversionPipeline;
import edu.stanford.nlp.pipeline.CoreDocument;
import edu.stanford.nlp.pipeline.CoreSentence;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.ollama.OllamaChatModel; import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.ArrayList; import java.util.*;
import java.util.List;
@Slf4j @Slf4j
@Service @Service
@RequiredArgsConstructor @RequiredArgsConstructor
@ -31,7 +35,38 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
return Integer.compare(o1.getPageNo(), o2.getPageNo()); return Integer.compare(o1.getPageNo(), o2.getPageNo());
} }
).toList(); ).toList();
return null;
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit");
// 创建管道
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
List<TruncateDTO> truncateDTOS = new ArrayList<>();
for (DocumentDTO documentDTO : documentDTOList) {
String content = documentDTO.getContent();
if (StrUtil.isEmpty(content)){
continue;
}
Integer layoutType = documentDTO.getLayoutType();
if (LayoutTypeEnum.TEXT.getCode() == layoutType){
// 如果是文本类型的布局,进行合并
CoreDocument document = new CoreDocument(content);
// 分析文本
pipeline.annotate(document);
// 获取句子
for (CoreSentence sentence : document.sentences()) {
TruncateDTO truncateDTO = new TruncateDTO(documentDTO);
truncateDTO.setContent(sentence.text());
truncateDTOS.add(truncateDTO);
}
} else if (LayoutTypeEnum.TABLE.getCode() == layoutType) {
// 如果是表格类型的布局,直接添加到列表中
TruncateDTO truncateDTO = new TruncateDTO(documentDTO);
truncateDTOS.add(truncateDTO);
} else {
log.info("sliceDocuments:错误的布局类型: {}", layoutType);
}
}
return truncateDTOS;
} }
@Override @Override
@ -39,13 +74,11 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
if (StrUtil.equals(truncateDTO.getLayoutType(),"0")){ if (StrUtil.equals(truncateDTO.getLayoutType(),"0")){
EREDTO eredto = doTextEre(truncateDTO); return doTextEre(truncateDTO);
return eredto;
} }
if (StrUtil.equals(truncateDTO.getLayoutType(),"1")){ if (StrUtil.equals(truncateDTO.getLayoutType(),"1")){
EREDTO eredto = doTableEre(truncateDTO); return doTableEre(truncateDTO);
return eredto;
} }
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType()); log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
return null; return null;
@ -80,26 +113,119 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
if (CollUtil.isEmpty(eredtoList)){ if (CollUtil.isEmpty(eredtoList)){
return merged; return merged;
} }
Map<String, EntityExtractionDTO> entityMap = new HashMap<>();
Map<String, RelationExtractionDTO> relationMap = new HashMap<>();
for (EREDTO eredto : eredtoList) { for (EREDTO eredto : eredtoList) {
List<EntityExtractionDTO> entities = eredto.getEntities(); List<EntityExtractionDTO> entities = eredto.getEntities();
if (CollUtil.isNotEmpty(entities)){ if (CollUtil.isNotEmpty(entities)){
for (EntityExtractionDTO entity : entities) { for (EntityExtractionDTO entity : entities) {
String e = entity.getEntity(); String key = generateEntityMapKey(entity);
String name = entity.getName(); mergeAttribute(entityMap,entity, key);
// entity.getEntity() 和 entity.getName() 完全相等看作是同一个数据
} }
} }
List<RelationExtractionDTO> relations = eredto.getRelations(); List<RelationExtractionDTO> relations = eredto.getRelations();
if (CollUtil.isNotEmpty(relations)){ if (CollUtil.isNotEmpty(relations)){
for (RelationExtractionDTO relation : relations) { for (RelationExtractionDTO relation : relations) {
String source = relation.getSource();
String target = relation.getTarget();
String re = relation.getRelation();
// source和target,re完全相等看作是同一个数据 // source和target,re完全相等看作是同一个数据
String relationMapKey = generateRelationMapKey(relation);
mergeAttribute(relationMap,relation, relationMapKey);
}
}
}
// 利用合并后的map生成新的EREDTO
// 优先先把有关系的节点与关系组合在一次
Set<String> relationEntityKey = new HashSet<>();
for (Map.Entry<String, RelationExtractionDTO> relationEntry : relationMap.entrySet()) {
RelationExtractionDTO value = relationEntry.getValue();
EntityExtractionDTO sourceEntity = entityMap.get(StrUtil.join("_", value.getSourceType(), value.getSource()));
if (null == sourceEntity){
log.warn("mergeEreResults:根据entity:{},name:{}未在entityMap中找到头节点映射关系", value.getSourceType(), value.getSource());
continue;
}
EntityExtractionDTO targetEntity = entityMap.get(StrUtil.join("_", value.getTargetType(), value.getTarget()));
if (null == targetEntity){
log.warn("mergeEreResults:根据entity:{},name:{}未在entityMap中找到尾节点映射关系", value.getTargetType(), value.getTarget());
continue;
}
EREDTO eredto = new EREDTO();
eredto.setEntities(List.of(sourceEntity,targetEntity));
eredto.setRelations(List.of(value));
merged.add(eredto);
relationEntityKey.addAll(List.of(generateEntityMapKey(sourceEntity),generateEntityMapKey(targetEntity)));
}
// 将没有关系的节点单独放在一起
List<EntityExtractionDTO> leavedEntities = new ArrayList<>();
for (Map.Entry<String, EntityExtractionDTO> entry : entityMap.entrySet()) {
if (!relationEntityKey.contains(entry.getKey())){
leavedEntities.add(entry.getValue());
}
}
EREDTO eredto = new EREDTO();
eredto.setEntities(leavedEntities);
merged.add(eredto);
return merged;
}
private void mergeAttribute(Map<String, RelationExtractionDTO> entityMap,RelationExtractionDTO relation, String key) {
RelationExtractionDTO cachedEntity = entityMap.get(key);
if (null == cachedEntity){
entityMap.put(key, relation);
}else {
if (CollUtil.isEmpty(relation.getAttributes())){
return;
}
// 合并属性
List<ERAttributeDTO> attributes = relation.getAttributes();
if (null == attributes){
attributes = new ArrayList<>();
}
for (ERAttributeDTO attribute : relation.getAttributes()) {
String attributeKey = attribute.getAttribute();
String attributeValue = attribute.getValue();
if (StrUtil.isEmpty(attributeKey) || StrUtil.isEmpty(attributeValue)){
continue;
}
// 如果属性已经存在,则不添加
if (attributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) {
attributes.add(attribute);
}
} }
} }
} }
private void mergeAttribute(Map<String, EntityExtractionDTO> entityMap,EntityExtractionDTO entity, String key) {
return null; EntityExtractionDTO cachedEntity = entityMap.get(key);
if (null == cachedEntity){
entityMap.put(key, entity);
}else {
if (CollUtil.isEmpty(entity.getAttributes())){
return;
}
// 合并属性
List<ERAttributeDTO> attributes = entity.getAttributes();
if (null == attributes){
attributes = new ArrayList<>();
}
for (ERAttributeDTO attribute : entity.getAttributes()) {
String attributeKey = attribute.getAttribute();
String attributeValue = attribute.getValue();
if (StrUtil.isEmpty(attributeKey) || StrUtil.isEmpty(attributeValue)){
continue;
}
// 如果属性已经存在,则不添加
if (attributes.stream().noneMatch(a -> StrUtil.equals(a.getAttribute(), attributeKey))) {
attributes.add(attribute);
}
}
}
}
private String generateEntityMapKey(EntityExtractionDTO entityExtractionDTO) {
return entityExtractionDTO.getEntity() + "_" + entityExtractionDTO.getName();
}
private String generateRelationMapKey(RelationExtractionDTO relationExtractionDTO) {
return relationExtractionDTO.getSource() + "_" + relationExtractionDTO.getTarget() + "_" + relationExtractionDTO.getRelation();
} }
} }

@ -1,11 +1,13 @@
package com.supervision.pdfqaserver.service.impl; package com.supervision.pdfqaserver.service.impl;
import com.supervision.pdfqaserver.cache.PromptCache;
import com.supervision.pdfqaserver.dto.EREDTO; import com.supervision.pdfqaserver.dto.EREDTO;
import com.supervision.pdfqaserver.service.TripleToCypherExecutor; import com.supervision.pdfqaserver.service.TripleToCypherExecutor;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.ollama.OllamaChatModel; import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import static com.supervision.pdfqaserver.cache.PromptCache.ERE_TO_INSERT_CYPHER;
@Slf4j @Slf4j
@Service @Service
@ -15,7 +17,10 @@ public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
private final OllamaChatModel ollamaChatModel; private final OllamaChatModel ollamaChatModel;
@Override @Override
public String generateInsertCypher(EREDTO eredto) { public String generateInsertCypher(EREDTO eredto) {
return null;
String prompt = PromptCache.promptMap.get(ERE_TO_INSERT_CYPHER);
String call = ollamaChatModel.call(prompt);
return call;
} }
@Override @Override

@ -1,20 +1,54 @@
package com.supervision.pdfqaserver.service.impl; package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.supervision.pdfqaserver.domain.TruncationEntityExtraction; import com.supervision.pdfqaserver.domain.TruncationEntityExtraction;
import com.supervision.pdfqaserver.domain.TruncationErAttribute;
import com.supervision.pdfqaserver.dto.ERAttributeDTO;
import com.supervision.pdfqaserver.dto.EntityExtractionDTO;
import com.supervision.pdfqaserver.service.TruncationEntityExtractionService; import com.supervision.pdfqaserver.service.TruncationEntityExtractionService;
import com.supervision.pdfqaserver.mapper.TruncationEntityExtractionMapper; import com.supervision.pdfqaserver.mapper.TruncationEntityExtractionMapper;
import com.supervision.pdfqaserver.service.TruncationErAttributeService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.List;
/** /**
* @author Administrator * @author Administrator
* @description truncation_entity_extraction()Service * @description truncation_entity_extraction()Service
* @createDate 2025-04-27 11:45:24 * @createDate 2025-04-27 11:45:24
*/ */
@Slf4j
@Service @Service
@RequiredArgsConstructor
public class TruncationEntityExtractionServiceImpl extends ServiceImpl<TruncationEntityExtractionMapper, TruncationEntityExtraction> public class TruncationEntityExtractionServiceImpl extends ServiceImpl<TruncationEntityExtractionMapper, TruncationEntityExtraction>
implements TruncationEntityExtractionService{ implements TruncationEntityExtractionService{
private final TruncationErAttributeService truncationErAttributeService;
@Override
public void saveERE(List<EntityExtractionDTO> entities) {
if (CollUtil.isEmpty(entities)){
return;
}
for (EntityExtractionDTO entity : entities) {
TruncationEntityExtraction tee = new TruncationEntityExtraction();
tee.setTruncationId(entity.getTruncationId());
tee.setEntity(entity.getEntity());
tee.setName(entity.getName());
this.save(tee);
List<ERAttributeDTO> attributes = entity.getAttributes();
if (CollUtil.isEmpty(attributes)){
continue;
}
for (ERAttributeDTO attribute : attributes) {
attribute.setTerId(tee.getId());
TruncationErAttribute era = attribute.toTruncationErAttribute();
truncationErAttributeService.save(era);
}
}
}
} }

@ -1,20 +1,51 @@
package com.supervision.pdfqaserver.service.impl; package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.supervision.pdfqaserver.domain.TruncationErAttribute;
import com.supervision.pdfqaserver.domain.TruncationRelationExtraction; import com.supervision.pdfqaserver.domain.TruncationRelationExtraction;
import com.supervision.pdfqaserver.dto.ERAttributeDTO;
import com.supervision.pdfqaserver.dto.RelationExtractionDTO;
import com.supervision.pdfqaserver.service.TruncationErAttributeService;
import com.supervision.pdfqaserver.service.TruncationRelationExtractionService; import com.supervision.pdfqaserver.service.TruncationRelationExtractionService;
import com.supervision.pdfqaserver.mapper.TruncationRelationExtractionMapper; import com.supervision.pdfqaserver.mapper.TruncationRelationExtractionMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.List;
/** /**
* @author Administrator * @author Administrator
* @description truncation_relation_extraction()Service * @description truncation_relation_extraction()Service
* @createDate 2025-04-27 11:45:24 * @createDate 2025-04-27 11:45:24
*/ */
@Slf4j
@Service @Service
@RequiredArgsConstructor
public class TruncationRelationExtractionServiceImpl extends ServiceImpl<TruncationRelationExtractionMapper, TruncationRelationExtraction> public class TruncationRelationExtractionServiceImpl extends ServiceImpl<TruncationRelationExtractionMapper, TruncationRelationExtraction>
implements TruncationRelationExtractionService{ implements TruncationRelationExtractionService{
private final TruncationErAttributeService truncationErAttributeService;
@Override
public void saveERE(List<RelationExtractionDTO> relations) {
if (CollUtil.isEmpty(relations)){
return;
}
for (RelationExtractionDTO relation : relations) {
TruncationRelationExtraction re = relation.toTruncationRelationExtraction();
this.save(re);
if (CollUtil.isEmpty(relation.getAttributes())){
continue;
}
for (ERAttributeDTO attribute : relation.getAttributes()) {
TruncationErAttribute era = attribute.toTruncationErAttribute();
era.setTerId(re.getId());
truncationErAttributeService.save(era);
}
}
}
} }

@ -7,7 +7,7 @@
<resultMap id="BaseResultMap" type="com.supervision.pdfqaserver.domain.DocumentTruncation"> <resultMap id="BaseResultMap" type="com.supervision.pdfqaserver.domain.DocumentTruncation">
<id property="id" column="id" jdbcType="VARCHAR"/> <id property="id" column="id" jdbcType="VARCHAR"/>
<result property="documentId" column="document_id" jdbcType="INTEGER"/> <result property="documentId" column="document_id" jdbcType="INTEGER"/>
<result property="sectionId" column="section_id" jdbcType="VARCHAR"/> <result property="sectionId" column="section_id" jdbcType="INTEGER"/>
<result property="layoutType" column="layout_type" jdbcType="VARCHAR"/> <result property="layoutType" column="layout_type" jdbcType="VARCHAR"/>
<result property="title" column="title" jdbcType="VARCHAR"/> <result property="title" column="title" jdbcType="VARCHAR"/>
<result property="content" column="content" jdbcType="VARCHAR"/> <result property="content" column="content" jdbcType="VARCHAR"/>

Loading…
Cancel
Save