diff --git a/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java index a18808b..0c8553b 100644 --- a/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java +++ b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java @@ -21,6 +21,38 @@ public class PromptCache { public static final String EXTRACT_TABLE_TITLE = "EXTRACT_TABLE_TITLE"; + /** + * 分类PDF内容类型 + */ + public static final String CLASSIFY_CONTENT_TYPE = "CLASSIFY_CONTENT_TYPE"; + + + /** + * 识别行业类型 + */ + public static final String CLASSIFY_INDUSTRY = "CLASSIFY_INDUSTRY"; + + + /** + * 识别意图 + */ + public static final String CLASSIFY_INTENT = "CLASSIFY_INTENT"; + + /** + * 识别意图(训练使用) + */ + public static final String CLASSIFY_INTENT_TRAIN = "CLASSIFY_INTENT_TRAIN"; + + /** + * 识别意图元数据 + */ + public static final String EXTRACT_INTENT_METADATA = "EXTRACT_INTENT_METADATA"; + + /** + * 识别三元组 + */ + public static final String EXTRACT_ERE_BASE_INTENT = "EXTRACT_ERE_BASE_INTENT"; + public static final Map promptMap = new HashMap<>(); static { @@ -35,6 +67,12 @@ public class PromptCache { promptMap.put(GENERATE_ANSWER, GENERATE_ANSWER_PROMPT); promptMap.put(CLASSIFY_TABLE, CLASSIFY_TABLE_PROMPT); promptMap.put(EXTRACT_TABLE_TITLE, EXTRACT_TABLE_TITLE_PROMPT); + promptMap.put(CLASSIFY_CONTENT_TYPE, CLASSIFY_CONTENT_TYPE_PROMPT); + promptMap.put(CLASSIFY_INDUSTRY, CLASSIFY_INDUSTRY_PROMPT); + promptMap.put(CLASSIFY_INTENT, CLASSIFY_INTENT_PROMPT); + promptMap.put(CLASSIFY_INTENT_TRAIN, CLASSIFY_INTENT_TRAIN_PROMPT); + promptMap.put(EXTRACT_INTENT_METADATA, EXTRACT_INTENT_METADATA_PROMPT); + promptMap.put(EXTRACT_ERE_BASE_INTENT, EXTRACT_ERE_BASE_INTENT_PROMPT); } @@ -381,4 +419,340 @@ public class PromptCache { **需要处理的文本** {} """; + + + private static final String CLASSIFY_CONTENT_TYPE_PROMPT = """ + # PDF文档类型分类器提示词 + + ## 功能说明 + 根据指定的固定分类类型(`ContentType`),验证输入的PDF文本是否符合该类型特征,并返回JSON格式结果。 + + ## 分类类型 + {ContentType} + + + ## 输入参数 + + PDF文本内容: + {text} + + + ## 处理规则 + 1. **入参 `ContentType` 决定验证目标类型** + - 根据 `ContentType` 的值,严格匹配对应类型的特征: + - `0`:验证是否符合研报类型(专业术语、财务数据) + - `1`:验证是否符合对话类型(多轮对话标记) + - `2`:验证是否符合记录类型(时间戳、条目化描述) + + 2. **验证逻辑** + - 若文本特征与 `ContentType` 指定类型匹配 → 返回 `{"ContentType": 指定值}` + - 若文本特征不匹配 → 返回 `{}`(表示类型不符) + + 3. **类型定义** + ```json + { + "0": "研报类型(行业分析、财务数据)", + "1": "对话类型(会议记录、问答交流)", + "2": "记录类型(操作日志、事务记录)" + } + ``` + + + ## 验证示例 + ```json + // 示例1:指定类型0,文本符合研报特征 + 输入: + { + "text": "2023年新能源汽车渗透率达35%(乘联会),预计2024年突破50%" + } + 输出: + {"ContentType": 0} + + // 示例2:指定类型1,文本不符合对话特征 + 输入: + { + "text": "系统启动执行数据同步" + } + 输出: + {} + + // 示例3:指定类型2,文本符合记录特征 + 输入: + { + "text": "2023-10-01 14:00 用户登录异常;14:05 触发安全警报" + } + 输出: + {"ContentType": 2} + ``` + + --- + + **设计说明** + - 入参 `ContentType` 为固定值,用于声明待验证的目标类型,而非自动分类。 + - 输出结果仅表示文本是否符合声明的类型,实现“类型断言”功能。 + - 参数命名与原文档保持一致,但调整了逻辑语义以符合用户需求。 + + + ## 输出要求 + 1. 严格遵循JSON格式 + 2. 不需要解释,不需要说明。 + 仅返回以下两种结果之一: + - 匹配成功:`{"ContentType": 0/1/2}` + - 匹配失败:`{}` + + ./no_think + + """; + + private static final String CLASSIFY_INDUSTRY_PROMPT = """ + ### 行业类型识别 + + 你是一个专业的行业分类专家。你的任务是根据给定的文本内容,判断这段文本最可能属于下面行业列表中哪个行业。 + 请结合文本内容中的专业术语、关键领域、上下文信息,准确判断其所属行业,并返回对应的行业名称。 + + ### 输入: + + ``` + {text} + ``` + + + ### 行业列表 + {industryCategory} + + ### 输出要求: + + * 请只输出**一个最可能的行业类型**,不要输出概率或多个行业; + * 只返回**行业名称**,不需要解释、分析、备注等; + * 行业名称请选择行业列表中的行业。 + * 输出纯JSON格式,不要使用```json ```等任何Markdown标记包装 + + + ### 示例输出: + + ``` + { + industryCategory:软件与信息技术 + } + ``` + """; + + private static final String CLASSIFY_INTENT_PROMPT = """ + # 从文本中识别预定义意图类型 + + ## 功能说明 + 根据提供的准确意图列表,识别文本段落中匹配的意图类型。 + + ## 可用意图列表 + {IntentType} + + ## 处理规则 + 1. 严格匹配文本内容与意图类型的关联性 + 2. 文本可能匹配多个意图类型 + 3. 若无匹配则返回空对象 + + ## 待处理文本 + {text} + + ## 验证示例 + ```json + // 示例1:匹配单个意图 + 输入: + { + "text": "本公司注册地址为上海市浦东新区张江高科技园区" + } + 输出: + { + "IntentTypeList": ["公司地址"] + } + + // 示例2:匹配多个意图 + 输入: + { + "text": "2023年度财务报告显示公司总部位于北京,全年营收..." + } + 输出: + { + "IntentTypeList": ["公司地址", "公司年度报告"] + } + + // 示例3:无匹配意图 + 输入: + { + "text": "今天的天气很适合户外活动" + } + 输出: + {} + """; + + private static final String CLASSIFY_INTENT_TRAIN_PROMPT = """ + # 提取出文本片段的意图 + + ## 功能说明 + 识别PDF文本内容中某一段落的意图类型 + + ## 待处理文本 + {text} + + ## 验证示例 + + ```json + // 示例1: + 输入: + { + "text": "..." + } + 输出: + { + "IntentTypeList": ["...", "..."] + } + + // 示例2:文本意图无法识别 + 输入: + { + "text": "人生短短几个球" + } + 输出: + {} + ``` + + ## 输出要求 + + 1. 严格遵循 JSON 格式。 + 2. 输出纯JSON格式,不要使用```json ```等任何Markdown标记包装 + 3. 不需要解释,不需要说明,仅返回以下两种结果: + + 匹配成功: + ```json + {"IntentTypeList": ["...", "..."]} + ``` + - 匹配失败: + ```json + {} + ``` + + 3.每个意图标签必须独立表述,禁止使用“...和...”等连接词合并两个意图。 + ./no_think + """; + + private static final String EXTRACT_INTENT_METADATA_PROMPT = """ + # 元数据提取指令 + + ## 任务描述 + 你是一个专业的元数据提取引擎,需要从给定的文本片段中识别出符合指定意图的实体、关系及其属性,并按照标准JSON格式输出。 + + ## 输入数据 + - 文本片段: + {text} + + - 可选意图标签: + {IntentTypeList} + + + ## 输出要求 + 1. 分析文本内容,识别与意图标签相关的实体和关系 + 2. 每个结果应包含: + - source(来源实体) + - relation(关系) + - target(目标实体) + - intent(匹配的意图标签) + 3. 每个实体/关系应包含: + - type(类型) + - attributes(相关属性列表) + 4. 使用以下示例格式: + + ```json + [ + { + "source": { + "type": "实体类型1", + "attributes": ["属性1", "属性2"] + }, + "relation": { + "type": "关系类型", + "attributes": [] + }, + "target": { + "type": "实体类型2", + "attributes": ["属性3"] + }, + "intent": "匹配的意图标签" + } + ] + + 5. 属性只代表属性名称:例如“名称“,”数量“ + """; + + private static final String EXTRACT_ERE_BASE_INTENT_PROMPT = """ + # 提示词 + + ## 任务描述: + 你是一个信息抽取引擎,需要从给定的文本中提取符合指定三元组标签(实体、关系、属性)的结构化数据。 + + ## 输入数据: + - 待处理文本:{text} + - 三元组标签及属性名称: + {domainMetadata} + + + ## 示例: + { + "nodes": [ + { + "type": "公司", + "attributes": { + "名称": "龙源(酒泉)风力发电有限公司", + "地址": "雨花台区" + } + }, + { + "type": "电子银行承兑汇票", + "attributes": { + "金额": "100.00万元", + "打印时间": "2024年10月20号" + } + }, + { + "type": "公司", + "attributes": { + "名称": "杭州六小龙", + "地址": "杭州高新区" + } + } + ], + "relations": [ + { + "type": "持有", + "attributes": { + } + }, + { + "type": "收购", + "attributes": { + "收购类型": "全资收购" + "收购时间":"2025年5月28号" + } + } + ], + "typed_triplets": [ + [ + "公司", + "持有", + "电子银行承兑汇票" + ], + [ + "公司", + "收购", + "公司" + ] + ] + } + + + ## 注意事项: + - 仅提取 `domainMetadata` 中定义的标签和属性。 + - 若属性无对应值,可留空或忽略。 + - 确保提取的值与原文一致,不进行推断或改写。 + - 输出纯JSON格式,不要使用```json ```等任何Markdown标记包装 + """; } diff --git a/src/main/java/com/supervision/pdfqaserver/constant/DocumentContentTypeEnum.java b/src/main/java/com/supervision/pdfqaserver/constant/DocumentContentTypeEnum.java index 3bc5d6f..9296776 100644 --- a/src/main/java/com/supervision/pdfqaserver/constant/DocumentContentTypeEnum.java +++ b/src/main/java/com/supervision/pdfqaserver/constant/DocumentContentTypeEnum.java @@ -32,4 +32,21 @@ public enum DocumentContentTypeEnum { private final String type; private final String desc; + + public static String formatToString() { + StringBuilder sb = new StringBuilder(); + for (DocumentContentTypeEnum value : values()) { + sb.append(value.getType()).append(":").append(value.getDesc()).append(" "); + } + return sb.toString(); + } + + public static DocumentContentTypeEnum getByType(String type) { + for (DocumentContentTypeEnum value : values()) { + if (value.getType().equals(type)) { + return value; + } + } + return null; + } } diff --git a/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java index 20ea41d..b332993 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java @@ -1,6 +1,8 @@ package com.supervision.pdfqaserver.dto; +import cn.hutool.core.util.StrUtil; import com.supervision.pdfqaserver.domain.DomainMetadata; +import com.supervision.pdfqaserver.domain.ErAttribute; import lombok.Data; import java.util.ArrayList; @@ -51,6 +53,10 @@ public class DomainMetadataDTO { */ private String generationType; + + public DomainMetadataDTO() { + } + public DomainMetadata toDomainMetadata() { DomainMetadata domainMetadata = new DomainMetadata(); domainMetadata.setId(this.id); @@ -62,4 +68,31 @@ public class DomainMetadataDTO { return domainMetadata; } + public DomainMetadataDTO(DomainMetadata domainMetadata,List erAttributes) { + this.id = domainMetadata.getId(); + this.domainCategoryId = domainMetadata.getDomainCategoryId(); + this.sourceType = domainMetadata.getSourceType(); + this.relation = domainMetadata.getRelation(); + this.targetType = domainMetadata.getTargetType(); + this.generationType = domainMetadata.getGenerationType(); + + for (ErAttribute erAttribute : erAttributes) { + if (StrUtil.equals(erAttribute.getDomainMetadataId(),this.id)){ + if(StrUtil.equals(erAttribute.getErType(),"1")){ + // 节点数据 + if (StrUtil.equals(erAttribute.getAttrName(),this.sourceType)) { + this.sourceAttributes.add(new ERAttributeDTO(erAttribute)); + } + if (StrUtil.equals(erAttribute.getAttrName(),this.targetType)) { + this.targetAttributes.add(new ERAttributeDTO(erAttribute)); + } + }else { + if (StrUtil.equals(erAttribute.getAttrName(),this.relation)) { + this.relationAttributes.add(new ERAttributeDTO(erAttribute)); + } + } + } + } + } + } diff --git a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java index 9dd6a30..6a2c84c 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java @@ -33,6 +33,31 @@ public class ERAttributeDTO { */ private String erType; + + public ERAttributeDTO() { + } + + public ERAttributeDTO(String id, String domainMetadataId, String erName, String attrName, String attrValueType, String erType) { + this.id = id; + this.domainMetadataId = domainMetadataId; + this.erName = erName; + this.attrName = attrName; + this.attrValueType = attrValueType; + this.erType = erType; + } + + public ERAttributeDTO(String attrName) { + this.attrName = attrName; + } + + public ERAttributeDTO(ErAttribute erAttribute) { + this.id = erAttribute.getId(); + this.domainMetadataId = erAttribute.getDomainMetadataId(); + this.attrName = erAttribute.getAttrName(); + this.attrValueType = erAttribute.getAttrValueType(); + this.erType = erAttribute.getErType(); + } + public ErAttribute toErAttribute() { ErAttribute erAttribute = new ErAttribute(); erAttribute.setId(this.id); diff --git a/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java index 720d2b2..5e1c7f6 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/IntentDTO.java @@ -32,6 +32,17 @@ public class IntentDTO { */ private String generationType; + public IntentDTO() { + } + + public IntentDTO(String id, String digest, String desc, String domainCategoryId, String generationType) { + this.id = id; + this.digest = digest; + this.desc = desc; + this.domainCategoryId = domainCategoryId; + this.generationType = generationType; + } + public IntentDTO(Intention intention){ this.id = intention.getId(); this.digest = intention.getDigest(); diff --git a/src/main/java/com/supervision/pdfqaserver/service/DomainCategoryService.java b/src/main/java/com/supervision/pdfqaserver/service/DomainCategoryService.java index 17e99dd..c086f12 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/DomainCategoryService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/DomainCategoryService.java @@ -2,6 +2,7 @@ package com.supervision.pdfqaserver.service; import com.supervision.pdfqaserver.domain.DomainCategory; import com.baomidou.mybatisplus.extension.service.IService; +import java.util.List; /** * @author Administrator @@ -10,4 +11,7 @@ import com.baomidou.mybatisplus.extension.service.IService; */ public interface DomainCategoryService extends IService { + DomainCategory queryByIndustryName(String industryName); + + List listAllIndustryNames(); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java index 7887d2c..673433f 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java @@ -44,4 +44,12 @@ public interface DomainMetadataService extends IService { * @return */ DomainMetadata getByPrimaryKey(String sourceType, String targetType, String relation,String domainCategoryId); + + + /** + * 根据意图ID查询领域元数据 + * @param intentionIds 意图ID + * @return + */ + List listByIntentionIds(List intentionIds); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/ErAttributeService.java b/src/main/java/com/supervision/pdfqaserver/service/ErAttributeService.java index 98f1547..4c08384 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/ErAttributeService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/ErAttributeService.java @@ -17,4 +17,5 @@ public interface ErAttributeService extends IService { List listByDomainMetadataId(String domainMetadataId); + List listByDomainMetadataIds(List domainMetadataIds); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/IntentionDomainMetadataService.java b/src/main/java/com/supervision/pdfqaserver/service/IntentionDomainMetadataService.java index 0f447e3..ae969d3 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/IntentionDomainMetadataService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/IntentionDomainMetadataService.java @@ -15,5 +15,7 @@ public interface IntentionDomainMetadataService extends IService listByIntentionId(String intentionId); + List listByIntentionIds(List intentionIds); + void batchSaveIfAbsent(String intentionId, List metadataIds); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java b/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java index 76feb6a..85dffe4 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java +++ b/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java @@ -13,4 +13,12 @@ import java.util.List; public interface PdfAnalysisOutputService extends IService { List queryByPdfId(Integer pdfId); + + /** + * 根据pdfId查询前n个字符串 + * @param pdfId + * @param limit + * @return + */ + String queryByPdfIdAndLimit(Integer pdfId, Integer limit); } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainCategoryServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainCategoryServiceImpl.java index c86a68c..378c019 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainCategoryServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainCategoryServiceImpl.java @@ -5,6 +5,8 @@ import com.supervision.pdfqaserver.domain.DomainCategory; import com.supervision.pdfqaserver.service.DomainCategoryService; import com.supervision.pdfqaserver.mapper.DomainCategoryMapper; import org.springframework.stereotype.Service; +import java.util.List; +import java.util.stream.Collectors; /** * @author Administrator @@ -15,6 +17,17 @@ import org.springframework.stereotype.Service; public class DomainCategoryServiceImpl extends ServiceImpl implements DomainCategoryService{ + @Override + public DomainCategory queryByIndustryName(String industryName) { + return this.lambdaQuery().eq(DomainCategory::getIndustryName, industryName) + .one(); + } + + @Override + public List listAllIndustryNames() { + return super.lambdaQuery().select(DomainCategory::getIndustryName) + .list().stream().map(DomainCategory::getIndustryName).collect(Collectors.toList()); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java index 6349d70..370a292 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java @@ -4,6 +4,8 @@ import cn.hutool.core.collection.CollUtil; import cn.hutool.core.lang.Assert; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.supervision.pdfqaserver.domain.DomainMetadata; +import com.supervision.pdfqaserver.domain.ErAttribute; +import com.supervision.pdfqaserver.domain.IntentionDomainMetadata; import com.supervision.pdfqaserver.dto.DomainMetadataDTO; import com.supervision.pdfqaserver.dto.ERAttributeDTO; import com.supervision.pdfqaserver.service.DomainMetadataService; @@ -14,7 +16,7 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; - +import java.util.ArrayList; import java.util.List; /** @@ -70,6 +72,7 @@ public class DomainMetadataServiceImpl extends ServiceImpl listByIntentionIds(List intentionIds) { + + List domainMetadataDTOS = new ArrayList<>(); + List intentionDomainMetadataList = intentionDomainMetadataService.listByIntentionIds(intentionIds); + if (CollUtil.isEmpty(intentionDomainMetadataList)){ + return domainMetadataDTOS; + } + List domainMetadataIds = intentionDomainMetadataList.stream().map(IntentionDomainMetadata::getDomainMetadataId).distinct().toList(); + List erAttributes = erAttributeService.listByDomainMetadataIds(domainMetadataIds); + for (IntentionDomainMetadata intentionDomainMetadata : intentionDomainMetadataList) { + DomainMetadata domainMetadata = this.getById(intentionDomainMetadata.getDomainMetadataId()); + domainMetadataDTOS.add(new DomainMetadataDTO(domainMetadata, erAttributes)); + } + return domainMetadataDTOS; + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java index 4d83236..a536aea 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/ErAttributeServiceImpl.java @@ -1,5 +1,6 @@ package com.supervision.pdfqaserver.service.impl; +import cn.hutool.core.collection.CollUtil; import cn.hutool.core.lang.Assert; import cn.hutool.core.util.StrUtil; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; @@ -8,7 +9,6 @@ import com.supervision.pdfqaserver.service.ErAttributeService; import com.supervision.pdfqaserver.mapper.ErAttributeMapper; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; - import java.util.List; /** @@ -39,6 +39,14 @@ public class ErAttributeServiceImpl extends ServiceImpl listByDomainMetadataId(String domainMetadataId) { return super.lambdaQuery().eq(ErAttribute::getDomainMetadataId, domainMetadataId).list(); } + + @Override + public List listByDomainMetadataIds(List domainMetadataIds) { + if (CollUtil.isEmpty(domainMetadataIds)){ + return List.of(); + } + return this.lambdaQuery().in(ErAttribute::getDomainMetadataId, domainMetadataIds).list(); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionDomainMetadataServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionDomainMetadataServiceImpl.java index a221352..201ae43 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionDomainMetadataServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionDomainMetadataServiceImpl.java @@ -29,6 +29,14 @@ public class IntentionDomainMetadataServiceImpl extends ServiceImpl listByIntentionIds(List intentionIds) { + if (CollUtil.isEmpty(intentionIds)){ + return new ArrayList<>(); + } + return this.lambdaQuery().in(IntentionDomainMetadata::getIntentionId, intentionIds).list(); + } + @Override @Transactional(rollbackFor = Exception.class) public void batchSaveIfAbsent(String intentionId, List metadataIds) { diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java index bf3c69f..9298f33 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/IntentionServiceImpl.java @@ -41,7 +41,7 @@ public class IntentionServiceImpl extends ServiceImplStrUtil.equals(t.getLayoutType(), String.valueOf(LayoutTypeEnum.TEXT.getCode()))).collect(Collectors.toList()); + log.info("只识别文本类型数据,个数:{}", truncateDTOS.size()); + int truncateSize = truncateDTOS.size(); + int index = 1; + int intentSize = 0; + TimeInterval interval = new TimeInterval(); for (TruncateDTO truncateDTO : truncateDTOS) { try { + log.info("正在意图、元数据抽取,切分文档id:{},识别进度:{}", truncateDTO.getId(), NumberUtil.formatPercent((index*1.0)/truncateSize, 2)); + log.info("开始意图识别,切分文档id:{}", truncateDTO.getId()); + interval.start("makeOutTruncationIntent"); List intents = tripleConversionPipeline.makeOutTruncationIntent(truncateDTO); + log.info("意图识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(),interval.intervalMs("makeOutTruncationIntent")); + if (CollUtil.isEmpty(intents)){ + log.info("切分文档id:{},未正确识别出意图...", truncateDTO.getId()); + continue; + } + log.info("开始意图元数据识别,切分文档id:{}", truncateDTO.getId()); + interval.start("makeOutDomainMetadata"); List domainMetadataDTOS = tripleConversionPipeline.makeOutDomainMetadata(truncateDTO, intents); + log.info("意图元数据识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(),interval.intervalMs("makeOutDomainMetadata")); // 保存意图数据 + intentSize ++; List intentions = intentionService.batchSaveIfAbsent(intents, pdfInfo.getDomainCategoryId(), pdfId.toString()); for (Intention intention : intentions) { List metadataDTOS = domainMetadataDTOS.stream() @@ -151,16 +220,39 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { } } + log.info("意图、元数据抽取完成,耗时:{}秒,一共处理片段数:{}个,抽取出意图数量:{}个", interval.intervalSecond(),truncateSize,intentSize); } - @Override - public void generateGraphBaseTrain(Integer pdfId) { + private void generateGraphBaseTrainExecutor(Integer pdfId){ Assert.notNull(pdfId, "pdfId不能为空"); PdfInfo pdfInfo = pdfInfoService.getByPdfId(pdfId); Assert.notNull(pdfInfo, "pdfId:{}没有找到对应的pdf信息", pdfId); - Assert.isTrue((null !=pdfInfo.getTrainStatus() && pdfInfo.getTrainStatus() == 1), - "pdfId:{}的pdf训练状态:{} 不符合要求", pdfId, pdfInfo.getTrainStatus()); + if (StrUtil.isEmpty(pdfInfo.getContentType())){ + log.info("pdfId:{}没有找到对应的pdf内容类型,开始识别文档内容类型...", pdfId); + DocumentContentTypeEnum documentContentTypeEnum = tripleConversionPipeline.makeOutPdfContentType(pdfId); + if (null == documentContentTypeEnum){ + log.info("pdfId:{}没有找到对应的pdf内容类型,停止后续任务...", pdfId); + return; + } + pdfInfo.setContentType(documentContentTypeEnum.getType()); + pdfInfoService.updateContentType(pdfId, documentContentTypeEnum.getType()); + } + if (null == pdfInfo.getDomainCategoryId()){ + log.info("pdfId:{}没有找到对应的pdf行业,开始识别文档行业...", pdfId); + String industry = tripleConversionPipeline.makeOutPdfIndustry(pdfId); + if (StrUtil.isEmpty(industry)){ + log.info("pdfId:{}没有找到对应的pdf行业,停止后续任务...", pdfId); + return; + } + DomainCategory domainCategory = domainCategoryService.queryByIndustryName(industry); + if (null == domainCategory){ + log.info("pdfId:{}没有找到:{}对应的行业分类,停止后续任务...", pdfId, industry); + return; + } + pdfInfo.setDomainCategoryId(domainCategory.getId()); + pdfInfoService.updateCategory(pdfId, domainCategory.getId()); + } List truncateDTOS = documentTruncationService.listByPdfId(pdfId).stream().map(TruncateDTO::new).collect(Collectors.toList()); TripleConversionPipeline conversionPipeline = this.getTripleConversionPipeline(pdfInfo.getContentType(), pdfInfo.getDomainCategoryId()); @@ -172,7 +264,6 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { documentTruncationService.batchSave(truncateDTOS); log.info("切分数据完成,切分个数:{}", truncateDTOS.size()); } - log.info("开始命名实体识别,切分文档个数:{}", truncateDTOS.size()); // 查询当前行业分类下的意图 List intentionDTOs = intentionService.queryByDomainCategoryId(pdfInfo.getDomainCategoryId()).stream().map(IntentDTO::new).distinct().toList(); if (CollUtil.isEmpty(intentionDTOs)){ @@ -180,14 +271,25 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { return; } + TimeInterval timer = new TimeInterval(); + int index = 1; + int truncateSize = truncateDTOS.size(); + log.info("开始实体关系抽取,耗时:{}秒,一共处理片段数:{}个", timer.intervalSecond(), truncateDTOS.size()); for (TruncateDTO truncateDTO : truncateDTOS) { + log.info("开始命名实体识别,切分文档id:{},识别进度:{}", truncateDTO.getId(), NumberUtil.formatPercent((index*1.0)/truncateSize, 2)); try { + timer.start("makeOutTruncationIntent"); + log.info("开始意图识别,切分文档id:{}", truncateDTO.getId()); List intents = conversionPipeline.makeOutTruncationIntent(truncateDTO,intentionDTOs); + log.info("意图识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(), timer.intervalMs("makeOutTruncationIntent")); if (CollUtil.isEmpty(intents)){ log.info("切分文档id:{},未正确识别出意图...", truncateDTO.getId()); continue; } + log.info("开始命名实体识别,切分文档id:{}", truncateDTO.getId()); + timer.start("doEre"); EREDTO eredto = conversionPipeline.doEre(truncateDTO, intents); + log.info("命名实体识别完成,切分文档id:{},耗时:{}毫秒", truncateDTO.getId(), timer.intervalMs("doEre")); if (null == eredto){ log.info("切分文档id:{},命名实体识别结果为空...", truncateDTO.getId()); continue; @@ -198,7 +300,6 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e); } } - } @Override @@ -357,7 +458,7 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { return; } // 删除切分数据 - documentTruncationService.deleteByDocumentIds(documentIds); + //documentTruncationService.deleteByDocumentIds(documentIds); for (DocumentTruncation documentTruncation : documentTruncations) { String truncationId = documentTruncation.getId(); // 删除实体数据 diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/OllamaCallServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/OllamaCallServiceImpl.java index 53d5229..40a46d9 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/OllamaCallServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/OllamaCallServiceImpl.java @@ -14,6 +14,7 @@ public class OllamaCallServiceImpl implements AiCallService { private final OllamaChatModel ollamaChatModel; @Override public String call(String prompt) { - return null; + + return ollamaChatModel.call(prompt); } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java index 4471600..33941bb 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java @@ -6,8 +6,9 @@ import com.supervision.pdfqaserver.domain.PdfAnalysisOutput; import com.supervision.pdfqaserver.service.PdfAnalysisOutputService; import com.supervision.pdfqaserver.mapper.PdfAnalysisOutputMapper; import org.springframework.stereotype.Service; - +import java.util.Comparator; import java.util.List; +import java.util.stream.Collectors; /** * @author Administrator @@ -24,6 +25,20 @@ public class PdfAnalysisOutputServiceImpl extends ServiceImpl pdfAnalysisOutputs = this.queryByPdfId(pdfId); + // 截取前300个字符 + String fullText = pdfAnalysisOutputs.stream().sorted( + Comparator.comparingInt(PdfAnalysisOutput::getPageNo) + .thenComparingInt(PdfAnalysisOutput::getDisplayOrder) + ).map(PdfAnalysisOutput::getContent).collect(Collectors.joining()); + return fullText.substring(0, Math.min(limit, fullText.length())); + } } diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java index 41dbd77..0b2ba83 100644 --- a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java @@ -5,56 +5,207 @@ import cn.hutool.core.lang.Assert; import cn.hutool.core.util.BooleanUtil; import cn.hutool.core.util.RandomUtil; import cn.hutool.core.util.StrUtil; +import cn.hutool.json.JSONArray; +import cn.hutool.json.JSONObject; +import cn.hutool.json.JSONUtil; import com.supervision.pdfqaserver.cache.PromptCache; import com.supervision.pdfqaserver.constant.DocumentContentTypeEnum; import com.supervision.pdfqaserver.constant.LayoutTypeEnum; import com.supervision.pdfqaserver.dto.*; -import com.supervision.pdfqaserver.service.TripleConversionPipeline; +import com.supervision.pdfqaserver.service.*; import edu.stanford.nlp.pipeline.CoreDocument; import edu.stanford.nlp.pipeline.CoreSentence; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.ai.ollama.OllamaChatModel; import org.springframework.stereotype.Service; - import java.util.*; import java.util.stream.Collectors; +import static com.supervision.pdfqaserver.cache.PromptCache.*; @Slf4j @Service @RequiredArgsConstructor public class TripleConversionPipelineImpl implements TripleConversionPipeline { - private final OllamaChatModel ollamaChatModel; + private final AiCallService aiCallService; + + private final PdfAnalysisOutputService pdfAnalysisOutputService; + + private final DomainCategoryService domainCategoryService; + + private final DomainMetadataService domainMetadataService; @Override public DocumentContentTypeEnum makeOutPdfContentType(Integer pdfId) { - return null; + Assert.notNull(pdfId, "pdfId不能为空"); + String promptTemplate = PromptCache.promptMap.get(CLASSIFY_CONTENT_TYPE); + + // 截取前300个字符 + String text = pdfAnalysisOutputService.queryByPdfIdAndLimit(pdfId,300); + Assert.notEmpty(text, "text不能为空"); + + Map param = Map.of("text", text, "ContentType", DocumentContentTypeEnum.formatToString()); + String format = StrUtil.format(promptTemplate, param); + log.debug("makeOutPdfContentType:prompt内容:{}", format); + String call = aiCallService.call(format); + log.info("makeOutPdfContentType:响应结果:{}", call); + JSONObject jsonObject = JSONUtil.parseObj(call); + return DocumentContentTypeEnum.getByType(jsonObject.getStr("ContentType")); } @Override public String makeOutPdfIndustry(Integer pdfId) { - return null; + List allIndustryNames = domainCategoryService.listAllIndustryNames(); + Assert.notEmpty(allIndustryNames, "行业名称不能为空"); + String promptTemplate = PromptCache.promptMap.get(CLASSIFY_INDUSTRY); + String text = pdfAnalysisOutputService.queryByPdfIdAndLimit(pdfId, 300); + String format = StrUtil.format(promptTemplate, Map.of("text", text, "industryCategory", String.join(",", allIndustryNames))); + String call = aiCallService.call(format); + log.info("makeOutPdfIndustry:响应结果:{}", call); + JSONObject json = JSONUtil.parseObj(call); + return json.getStr("industryCategory"); } @Override public List makeOutTruncationIntent(TruncateDTO truncate) { - return null; + Assert.notEmpty(truncate.getContent(), "内容不能为空"); + String promptTemplate = PromptCache.promptMap.get(CLASSIFY_INTENT_TRAIN); + Map params = Map.of("text", truncate.getContent()); + String format = StrUtil.format(promptTemplate, params); + String call = aiCallService.call(format); + log.info("makeOutTruncationIntent:响应结果:{}", call); + JSONObject json = JSONUtil.parseObj(call); + JSONArray jsonArray = json.getJSONArray("IntentTypeList"); + return jsonArray.stream().map(Object::toString).toList(); } @Override public List makeOutTruncationIntent(TruncateDTO truncate, List intents) { - return null; + Assert.notEmpty(truncate.getContent(), "内容不能为空"); + Assert.notEmpty(intents, "意图不能为空"); + + String promptTemplate = PromptCache.promptMap.get(CLASSIFY_INTENT); + List digestList = intents.stream().map(IntentDTO::getDigest).toList(); + Map params = Map.of("text", truncate.getContent(), "IntentType", JSONUtil.toJsonStr(digestList)); + String format = StrUtil.format(promptTemplate, params); + String call = aiCallService.call(format); + log.info("makeOutTruncationIntent:响应结果:{}", call); + JSONObject json = JSONUtil.parseObj(call); + JSONArray jsonArray = json.getJSONArray("IntentTypeList"); + return intents.stream().filter(intent-> + jsonArray.stream().anyMatch(o->StrUtil.equals(o.toString(), intent.getDigest()))) + .collect(Collectors.toList()); } @Override public List makeOutDomainMetadata(TruncateDTO truncate,List intents) { - return null; + Assert.notEmpty(truncate.getContent(), "内容不能为空"); + Assert.notEmpty(intents, "意图不能为空"); + + String promptTemplate = promptMap.get(EXTRACT_INTENT_METADATA); + Map params = Map.of("text", truncate.getContent(), "IntentType", JSONUtil.toJsonStr(intents)); + String format = StrUtil.format(promptTemplate, params); + String call = aiCallService.call(format); + log.info("makeOutDomainMetadata:响应结果:{}", call); + return parseDomainMetadata(call); + } + + /** + * [ + * { + * "source": { + * "type": "实体类型1", + * "attributes": ["属性1", "属性2"] + * }, + * "relation": { + * "type": "关系类型", + * "attributes": [] + * }, + * "target": { + * "type": "实体类型2", + * "attributes": ["属性3"] + * }, + * "intent": "匹配的意图标签" + * } + * ] + */ + private List parseDomainMetadata(String jsonStr) { + JSONArray jsonArray = JSONUtil.parseArray(jsonStr); + List domainMetadataDTOS = new ArrayList<>(); + for (int i = 0; i < jsonArray.size(); i++) { + JSONObject jsonObject = jsonArray.getJSONObject(i); + DomainMetadataDTO domainMetadataDTO = new DomainMetadataDTO(); + JSONObject source = jsonObject.getJSONObject("source"); + JSONObject relation = jsonObject.getJSONObject("relation"); + JSONObject target = jsonObject.getJSONObject("target"); + if (null != source){ + String type = source.getStr("type"); + JSONArray attributes = source.getJSONArray("attributes"); + if (StrUtil.isNotEmpty(type)){ + domainMetadataDTO.setSourceType(type); + } + if (CollUtil.isNotEmpty(attributes)){ + List erAttributeDTOS = attributes.stream().map(at -> new ERAttributeDTO(at.toString())).collect(Collectors.toList()); + domainMetadataDTO.setSourceAttributes(erAttributeDTOS); + } + } + if (null != relation){ + String type = relation.getStr("type"); + JSONArray attributes = relation.getJSONArray("attributes"); + if (StrUtil.isNotEmpty(type)){ + domainMetadataDTO.setRelation(type); + } + if (CollUtil.isNotEmpty(attributes)){ + List erAttributeDTOS = attributes.stream().map(at -> new ERAttributeDTO(at.toString())).collect(Collectors.toList()); + domainMetadataDTO.setRelationAttributes(erAttributeDTOS); + } + } + if (null != target){ + String type = target.getStr("type"); + JSONArray attributes = target.getJSONArray("attributes"); + if (StrUtil.isNotEmpty(type)){ + domainMetadataDTO.setTargetType(type); + } + if (CollUtil.isNotEmpty(attributes)){ + List erAttributeDTOS = attributes.stream().map(at -> new ERAttributeDTO(at.toString())).collect(Collectors.toList()); + domainMetadataDTO.setTargetAttributes(erAttributeDTOS); + } + } + domainMetadataDTOS.add(domainMetadataDTO); + } + return domainMetadataDTOS; } @Override public EREDTO doEre(TruncateDTO truncateDTO, List intents) { + if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TEXT.getCode()))){ + if (CollUtil.isEmpty(intents)){ + return doTextEre(truncateDTO); + } + // 查询意图对应的领域元数据 + List intentIds = intents.stream().map(IntentDTO::getId).distinct().collect(Collectors.toList()); + if (CollUtil.isEmpty(intentIds)) { + return null; + } + List domainMetadataDTOS = domainMetadataService.listByIntentionIds(intentIds); + return doTextEreWithMetadata(truncateDTO, domainMetadataDTOS); + } + + if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TABLE.getCode()))){ + // 先分析表格是否是描述类型 + Boolean classify = this.classify(truncateDTO.getContent()); + if (null == classify){ + log.info("doEre:表格分类结果为空,切分文档id:{}", truncateDTO.getId()); + return null; + } + if (classify){ + return doTextEre(truncateDTO); + } + + return doTableEre(truncateDTO); + } + log.warn("doEre:错误的布局类型: {}", truncateDTO.getLayoutType()); return null; } @@ -172,25 +323,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { @Override public EREDTO doEre(TruncateDTO truncateDTO) { - if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TEXT.getCode()))){ - return doTextEre(truncateDTO); - } - - if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TABLE.getCode()))){ - // 先分析表格是否是描述类型 - Boolean classify = this.classify(truncateDTO.getContent()); - if (null == classify){ - log.info("doEre:表格分类结果为空,切分文档id:{}", truncateDTO.getId()); - return null; - } - if (classify){ - return doTextEre(truncateDTO); - } - - return doTableEre(truncateDTO); - } - log.warn("doEre:错误的布局类型: {}", truncateDTO.getLayoutType()); - return null; + return this.doEre(truncateDTO, new ArrayList<>()); } @Override @@ -209,7 +342,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { String prompt = PromptCache.promptMap.get(PromptCache.CLASSIFY_TABLE); String format = StrUtil.format(prompt, content); - String response = ollamaChatModel.call(format); + String response = aiCallService.call(format); log.info("classify响应结果:{}", response); return BooleanUtil.toBooleanObject(response); } @@ -223,16 +356,86 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { } String table = PromptCache.promptMap.get(PromptCache.EXTRACT_TABLE_TITLE); String format = StrUtil.format(table, content); - String response = ollamaChatModel.call(format); + String response = aiCallService.call(format); tableTitleDTO.setTitle(response); return tableTitleDTO; } + /** + * 文本实体关系抽取 + * @param truncateDTO 切分文档 + * @param domainMetadataDTOS 领域元数据 + * @return + */ + private EREDTO doTextEreWithMetadata(TruncateDTO truncateDTO, List domainMetadataDTOS) { + + Assert.notEmpty(truncateDTO.getContent(), "内容不能为空"); + Assert.notEmpty(domainMetadataDTOS, "意图不能为空"); + + String prompt = promptMap.get(EXTRACT_ERE_BASE_INTENT); + String domainMetadata = metadataToJsonStr(domainMetadataDTOS); + + Map params = Map.of("text", truncateDTO.getContent(), "domainMetadata", domainMetadata); + String format = StrUtil.format(prompt, params); + String call = aiCallService.call(format); + return null; + } + + + /** + * 将领域元数据转换为json字符串 + * @param domainMetadataDTOS domainMetadataDTOS + * @return + */ + private String metadataToJsonStr(List domainMetadataDTOS){ + JSONArray jsa = new JSONArray(); + for (DomainMetadataDTO metadataDTO : domainMetadataDTOS) { + JSONObject metadataJson = new JSONObject(); + JSONObject source = new JSONObject(); + source.set("type", metadataDTO.getSourceType()); + if (metadataDTO.getSourceAttributes() != null) { + JSONArray sourceAttributes = new JSONArray(); + for (ERAttributeDTO attribute : metadataDTO.getSourceAttributes()) { + sourceAttributes.add(attribute.getAttrName()); + } + source.set("attributes", sourceAttributes); + } + metadataJson.set("source", source); + + JSONObject relation = new JSONObject(); + relation.set("type", metadataDTO.getRelation()); + if (metadataDTO.getRelationAttributes() != null) { + JSONArray relationAttributes = new JSONArray(); + for (ERAttributeDTO attribute : metadataDTO.getRelationAttributes()) { + relationAttributes.add(attribute.getAttrName()); + } + relation.set("attributes", relationAttributes); + } + + metadataJson.set("relation", relation); + JSONObject target = new JSONObject(); + target.set("type", metadataDTO.getTargetType()); + if (metadataDTO.getTargetAttributes() != null) { + JSONArray targetAttributes = new JSONArray(); + for (ERAttributeDTO attribute : metadataDTO.getTargetAttributes()) { + targetAttributes.add(attribute.getAttrName()); + } + target.set("attributes", targetAttributes); + } + metadataJson.set("target", target); + jsa.add(metadataJson); + } + + return jsa.toString(); + + + } + private EREDTO doTextEre(TruncateDTO truncateDTO) { log.info("doTextEre:开始进行文本实体关系抽取,内容:{}", truncateDTO.getContent()); String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TEXT); String formatted = StrUtil.format(prompt, truncateDTO.getContent()); - String response = ollamaChatModel.call(formatted); + String response = aiCallService.call(formatted); log.info("doTextEre响应结果:{}", response); return EREDTO.fromTextJson(response, truncateDTO.getId()); } @@ -241,7 +444,7 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline { log.info("doTableEre:开始进行表格实体关系抽取,内容:{}", truncateDTO.getContent()); String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TABLE); String formatted = StrUtil.format(prompt, truncateDTO.getContent()); - String response = ollamaChatModel.call(formatted); + String response = aiCallService.call(formatted); log.info("doTableEre响应结果:{}", response); EREDTO eredto = EREDTO.fromTableJson(response, truncateDTO.getId()); // 手动设置表格标题 diff --git a/src/test/java/com/supervision/pdfqaserver/PdfQaServerApplicationTests.java b/src/test/java/com/supervision/pdfqaserver/PdfQaServerApplicationTests.java index 60c5253..39974ec 100644 --- a/src/test/java/com/supervision/pdfqaserver/PdfQaServerApplicationTests.java +++ b/src/test/java/com/supervision/pdfqaserver/PdfQaServerApplicationTests.java @@ -1,6 +1,9 @@ package com.supervision.pdfqaserver; +import com.supervision.pdfqaserver.constant.DocumentContentTypeEnum; import com.supervision.pdfqaserver.dto.EREDTO; +import com.supervision.pdfqaserver.dto.IntentDTO; +import com.supervision.pdfqaserver.dto.TruncateDTO; import com.supervision.pdfqaserver.service.ChinesEsToEnglishGenerator; import com.supervision.pdfqaserver.service.KnowledgeGraphService; import com.supervision.pdfqaserver.service.TripleConversionPipeline; @@ -113,5 +116,39 @@ class PdfQaServerApplicationTests { System.out.println(classify); } + @Test + void makeOutPdfContentTypeTest() { + DocumentContentTypeEnum documentContentTypeEnum = tripleConversionPipeline.makeOutPdfContentType(5); + System.out.println(documentContentTypeEnum); + } + + @Test + void makeOutPdfIndustryTest() { + String industry = tripleConversionPipeline.makeOutPdfIndustry(5); + System.out.println(industry); + } + + @Test + void makeOutTruncationIntentTest() { + TruncateDTO truncateDTO = new TruncateDTO(); + truncateDTO.setContent("# 2、同时按照境外会计准则与按照中国会计准则披露的财务报告中净利润和净资产差异情况 \n" + + "\n" + + "□适用 回不适用 "); + List strings = tripleConversionPipeline.makeOutTruncationIntent(truncateDTO); + System.out.println(strings); + } + + @Test + void makeOutTruncationIntentTest2() { + TruncateDTO truncateDTO = new TruncateDTO(); + truncateDTO.setContent("# 2、同时按照境外会计准则与按照中国会计准则披露的财务报告中净利润和净资产差异情况 \n" + + "\n" + + "□适用 回不适用 "); + IntentDTO intentDTO = new IntentDTO(); + intentDTO.setDigest("财务报告差异分析"); + List strings = tripleConversionPipeline.makeOutTruncationIntent(truncateDTO,List.of(intentDTO)); + System.out.println(strings); + } + }