|
|
|
@ -1,6 +1,8 @@
|
|
|
|
|
package com.supervision.pdfqaserver.service.impl;
|
|
|
|
|
|
|
|
|
|
import cn.hutool.core.collection.CollUtil;
|
|
|
|
|
import cn.hutool.core.lang.Assert;
|
|
|
|
|
import cn.hutool.core.util.BooleanUtil;
|
|
|
|
|
import cn.hutool.core.util.StrUtil;
|
|
|
|
|
import com.supervision.pdfqaserver.cache.PromptCache;
|
|
|
|
|
import com.supervision.pdfqaserver.constant.LayoutTypeEnum;
|
|
|
|
@ -74,22 +76,48 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
|
|
|
|
|
public EREDTO doEre(TruncateDTO truncateDTO) {
|
|
|
|
|
|
|
|
|
|
if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TEXT.getCode()))){
|
|
|
|
|
return doTextEre(truncateDTO);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TABLE.getCode()))){
|
|
|
|
|
// 先分析表格是否是描述类型
|
|
|
|
|
Boolean classify = this.classify(truncateDTO.getContent());
|
|
|
|
|
if (null == classify){
|
|
|
|
|
log.info("doEre:表格分类结果为空,切分文档id:{}", truncateDTO.getId());
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
if (classify){
|
|
|
|
|
return doTextEre(truncateDTO);
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
log.error("doEre:文本实体关系抽取失败,内容:{}", truncateDTO.getContent(), e);
|
|
|
|
|
}
|
|
|
|
|
return doTableEre(truncateDTO);
|
|
|
|
|
}
|
|
|
|
|
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TABLE.getCode()))){
|
|
|
|
|
try {
|
|
|
|
|
return doTableEre(truncateDTO);
|
|
|
|
|
} catch (Exception e) {
|
|
|
|
|
log.error("doEre:表格实体关系抽取失败,内容:{}", truncateDTO.getContent(), e);
|
|
|
|
|
@Override
|
|
|
|
|
public Boolean classify(String content) {
|
|
|
|
|
Assert.notEmpty(content, "内容不能为空");
|
|
|
|
|
// 对表格内容进行精简,只获取与前四行相关的内容
|
|
|
|
|
String[] lines = content.split("\n");
|
|
|
|
|
if (lines.length > 5){
|
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
|
for (int i = 0; i < 5; i++) {
|
|
|
|
|
sb.append(lines[i]).append("\n");
|
|
|
|
|
}
|
|
|
|
|
content = sb.toString();
|
|
|
|
|
}
|
|
|
|
|
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
|
|
|
|
|
log.info("classify:开始进行实体关系分类,内容:{}", content);
|
|
|
|
|
String prompt = PromptCache.promptMap.get(PromptCache.CLASSIFY_TABLE);
|
|
|
|
|
|
|
|
|
|
String format = StrUtil.format(prompt, content);
|
|
|
|
|
String response = ollamaChatModel.call(format);
|
|
|
|
|
log.info("classify响应结果:{}", response);
|
|
|
|
|
return BooleanUtil.toBooleanObject(response);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public TableTitleDTO extractTableTitle(String content) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|