You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

106 lines
3.8 KiB
Java

2 months ago
package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.supervision.pdfqaserver.cache.PromptCache;
import com.supervision.pdfqaserver.dto.*;
import com.supervision.pdfqaserver.service.TripleConversionPipeline;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
@Slf4j
@Service
@RequiredArgsConstructor
public class TripleConversionPipelineImpl implements TripleConversionPipeline {
private final OllamaChatModel ollamaChatModel;
@Override
public List<TruncateDTO> sliceDocuments(List<DocumentDTO> documents) {
// 对pdfAnalysisOutputs进行排序
List<DocumentDTO> documentDTOList = documents.stream().sorted(
// 先对pageNo进行排序再对layoutOrder进行排序
(o1, o2) -> {
if (o1.getPageNo().equals(o2.getPageNo())) {
return Integer.compare(o1.getLayoutOrder(), o2.getLayoutOrder());
}
return Integer.compare(o1.getPageNo(), o2.getPageNo());
}
).toList();
return null;
}
@Override
public EREDTO doEre(TruncateDTO truncateDTO) {
if (StrUtil.equals(truncateDTO.getLayoutType(),"0")){
EREDTO eredto = doTextEre(truncateDTO);
return eredto;
}
if (StrUtil.equals(truncateDTO.getLayoutType(),"1")){
EREDTO eredto = doTableEre(truncateDTO);
return eredto;
}
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
return null;
}
private EREDTO doTextEre(TruncateDTO truncateDTO) {
String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TEXT);
String formatted = String.format(prompt, truncateDTO.getContent());
String response = ollamaChatModel.call(formatted);
// todo:暂时不去处理异常返回
return EREDTO.fromTextJson(response, truncateDTO.getId());
}
private EREDTO doTableEre(TruncateDTO truncateDTO) {
String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TABLE);
String formatted = String.format(prompt, truncateDTO.getContent());
String response = ollamaChatModel.call(formatted);
// todo:暂时不去处理异常返回
return EREDTO.fromTableJson(response, truncateDTO.getId());
}
/**
*
* @param eredtoList
* @return
*/
@Override
public List<EREDTO> mergeEreResults(List<EREDTO> eredtoList) {
List<EREDTO> merged = new ArrayList<>();
if (CollUtil.isEmpty(eredtoList)){
return merged;
}
for (EREDTO eredto : eredtoList) {
List<EntityExtractionDTO> entities = eredto.getEntities();
if (CollUtil.isNotEmpty(entities)){
for (EntityExtractionDTO entity : entities) {
String e = entity.getEntity();
String name = entity.getName();
// entity.getEntity() 和 entity.getName() 完全相等看作是同一个数据
}
}
List<RelationExtractionDTO> relations = eredto.getRelations();
if (CollUtil.isNotEmpty(relations)){
for (RelationExtractionDTO relation : relations) {
String source = relation.getSource();
String target = relation.getTarget();
String re = relation.getRelation();
// source和target,re完全相等看作是同一个数据
}
}
}
return null;
}
}