You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
106 lines
3.8 KiB
Java
106 lines
3.8 KiB
Java
2 months ago
|
package com.supervision.pdfqaserver.service.impl;
|
||
|
|
||
|
import cn.hutool.core.collection.CollUtil;
|
||
|
import cn.hutool.core.util.StrUtil;
|
||
|
import com.supervision.pdfqaserver.cache.PromptCache;
|
||
|
import com.supervision.pdfqaserver.dto.*;
|
||
|
import com.supervision.pdfqaserver.service.TripleConversionPipeline;
|
||
|
import lombok.RequiredArgsConstructor;
|
||
|
import lombok.extern.slf4j.Slf4j;
|
||
|
import org.springframework.ai.ollama.OllamaChatModel;
|
||
|
import org.springframework.stereotype.Service;
|
||
|
|
||
|
import java.util.ArrayList;
|
||
|
import java.util.List;
|
||
|
@Slf4j
|
||
|
@Service
|
||
|
@RequiredArgsConstructor
|
||
|
public class TripleConversionPipelineImpl implements TripleConversionPipeline {
|
||
|
|
||
|
private final OllamaChatModel ollamaChatModel;
|
||
|
|
||
|
@Override
|
||
|
public List<TruncateDTO> sliceDocuments(List<DocumentDTO> documents) {
|
||
|
// 对pdfAnalysisOutputs进行排序
|
||
|
List<DocumentDTO> documentDTOList = documents.stream().sorted(
|
||
|
// 先对pageNo进行排序再对layoutOrder进行排序
|
||
|
(o1, o2) -> {
|
||
|
if (o1.getPageNo().equals(o2.getPageNo())) {
|
||
|
return Integer.compare(o1.getLayoutOrder(), o2.getLayoutOrder());
|
||
|
}
|
||
|
return Integer.compare(o1.getPageNo(), o2.getPageNo());
|
||
|
}
|
||
|
).toList();
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
@Override
|
||
|
public EREDTO doEre(TruncateDTO truncateDTO) {
|
||
|
|
||
|
if (StrUtil.equals(truncateDTO.getLayoutType(),"0")){
|
||
|
|
||
|
EREDTO eredto = doTextEre(truncateDTO);
|
||
|
return eredto;
|
||
|
}
|
||
|
|
||
|
if (StrUtil.equals(truncateDTO.getLayoutType(),"1")){
|
||
|
EREDTO eredto = doTableEre(truncateDTO);
|
||
|
return eredto;
|
||
|
}
|
||
|
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
private EREDTO doTextEre(TruncateDTO truncateDTO) {
|
||
|
String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TEXT);
|
||
|
String formatted = String.format(prompt, truncateDTO.getContent());
|
||
|
String response = ollamaChatModel.call(formatted);
|
||
|
// todo:暂时不去处理异常返回
|
||
|
|
||
|
return EREDTO.fromTextJson(response, truncateDTO.getId());
|
||
|
}
|
||
|
|
||
|
private EREDTO doTableEre(TruncateDTO truncateDTO) {
|
||
|
String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TABLE);
|
||
|
String formatted = String.format(prompt, truncateDTO.getContent());
|
||
|
String response = ollamaChatModel.call(formatted);
|
||
|
// todo:暂时不去处理异常返回
|
||
|
|
||
|
return EREDTO.fromTableJson(response, truncateDTO.getId());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* 合并实体关系抽取结果 主要是对实体和关系中的属性进行合并
|
||
|
* @param eredtoList 实体关系抽取结果列表
|
||
|
* @return
|
||
|
*/
|
||
|
@Override
|
||
|
public List<EREDTO> mergeEreResults(List<EREDTO> eredtoList) {
|
||
|
List<EREDTO> merged = new ArrayList<>();
|
||
|
if (CollUtil.isEmpty(eredtoList)){
|
||
|
return merged;
|
||
|
}
|
||
|
for (EREDTO eredto : eredtoList) {
|
||
|
List<EntityExtractionDTO> entities = eredto.getEntities();
|
||
|
if (CollUtil.isNotEmpty(entities)){
|
||
|
for (EntityExtractionDTO entity : entities) {
|
||
|
String e = entity.getEntity();
|
||
|
String name = entity.getName();
|
||
|
// entity.getEntity() 和 entity.getName() 完全相等看作是同一个数据
|
||
|
}
|
||
|
}
|
||
|
List<RelationExtractionDTO> relations = eredto.getRelations();
|
||
|
if (CollUtil.isNotEmpty(relations)){
|
||
|
for (RelationExtractionDTO relation : relations) {
|
||
|
String source = relation.getSource();
|
||
|
String target = relation.getTarget();
|
||
|
String re = relation.getRelation();
|
||
|
// source和target,re完全相等看作是同一个数据
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return null;
|
||
|
}
|
||
|
}
|