From b9c957a74aa88346c95a0e0a3683175621a3e8b5 Mon Sep 17 00:00:00 2001 From: xueqingkun Date: Mon, 28 Apr 2025 09:24:33 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 65 +++--- .../pdfqaserver/PdfQaServerApplication.java | 3 +- .../pdfqaserver/cache/PromptCache.java | 185 ++++++++++++++++++ .../config/ExceptionHandlerConfig.java | 63 ++++++ .../config/MyMetaObjectHandler.java | 26 +++ .../pdfqaserver/config/MybatisPlusConfig.java | 43 ++++ .../constant/ResultStatusEnum.java | 55 ++++++ .../controller/ChatController.java | 5 + .../domain/ChineseEnglishWords.java | 41 ++++ .../domain/DocumentTruncation.java | 58 ++++++ .../pdfqaserver/domain/DomainMetadata.java | 60 ++++++ .../pdfqaserver/domain/PdfAnalysisOutput.java | 60 ++++++ .../pdfqaserver/domain/PdfInfo.java | 39 ++++ .../domain/TruncationEntityExtraction.java | 50 +++++ .../domain/TruncationErAttribute.java | 61 ++++++ .../domain/TruncationRelationExtraction.java | 65 ++++++ .../pdfqaserver/dto/DocumentDTO.java | 38 +++- .../pdfqaserver/dto/DomainMetadataDTO.java | 35 ++++ .../pdfqaserver/dto/ERAttributeDTO.java | 46 +++++ .../supervision/pdfqaserver/dto/EREDTO.java | 124 ++++++++++++ .../pdfqaserver/dto/EntityExtractionDTO.java | 37 ++++ .../dto/RelationExtractionDTO.java | 55 ++++++ .../pdfqaserver/dto/TruncateDTO.java | 33 +++- .../exception/BusinessException.java | 76 +++++++ .../mapper/ChineseEnglishWordsMapper.java | 18 ++ .../mapper/DocumentTruncationMapper.java | 18 ++ .../mapper/DomainMetadataMapper.java | 18 ++ .../mapper/PdfAnalysisOutputMapper.java | 18 ++ .../pdfqaserver/mapper/PdfInfoMapper.java | 18 ++ .../TruncationEntityExtractionMapper.java | 18 ++ .../mapper/TruncationErAttributeMapper.java | 18 ++ .../TruncationRelationExtractionMapper.java | 18 ++ .../service/ChineseEnglishWordsService.java | 13 ++ .../pdfqaserver/service/DocumentSlicer.java | 19 -- .../service/DocumentTruncationService.java | 18 ++ .../service/DomainMetadataService.java | 13 ++ .../service/KnowledgeGraphService.java | 21 ++ .../service/PdfAnalysisOutputService.java | 16 ++ .../pdfqaserver/service/PdfInfoService.java | 13 ++ .../service/TripleConversionPipeline.java | 36 ++++ .../service/TripleToCypherExecutor.java | 31 +++ .../TruncationEntityExtractionService.java | 13 ++ .../service/TruncationErAttributeService.java | 13 ++ .../TruncationRelationExtractionService.java | 13 ++ .../impl/ChineseEnglishWordsServiceImpl.java | 22 +++ .../impl/DocumentTruncationServiceImpl.java | 33 ++++ .../impl/DomainMetadataServiceImpl.java | 22 +++ .../impl/KnowledgeGraphServiceImpl.java | 84 ++++++++ .../impl/PdfAnalysisOutputServiceImpl.java | 31 +++ .../service/impl/PdfInfoServiceImpl.java | 22 +++ .../impl/TripleConversionPipelineImpl.java | 105 ++++++++++ .../impl/TripleToCypherExecutorImpl.java | 30 +++ ...TruncationEntityExtractionServiceImpl.java | 22 +++ .../TruncationErAttributeServiceImpl.java | 22 +++ ...uncationRelationExtractionServiceImpl.java | 22 +++ src/main/resources/application.yml | 10 + .../mapper/ChineseEnglishWordsMapper.xml | 18 ++ .../mapper/DocumentTruncationMapper.xml | 23 +++ .../resources/mapper/DomainMetadataMapper.xml | 23 +++ .../mapper/PdfAnalysisOutputMapper.xml | 23 +++ src/main/resources/mapper/PdfInfoMapper.xml | 18 ++ .../TruncationEntityExtractionMapper.xml | 20 ++ .../mapper/TruncationErAttributeMapper.xml | 23 +++ .../TruncationRelationExtractionMapper.xml | 24 +++ 64 files changed, 2220 insertions(+), 63 deletions(-) create mode 100644 src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java create mode 100644 src/main/java/com/supervision/pdfqaserver/config/ExceptionHandlerConfig.java create mode 100644 src/main/java/com/supervision/pdfqaserver/config/MyMetaObjectHandler.java create mode 100644 src/main/java/com/supervision/pdfqaserver/config/MybatisPlusConfig.java create mode 100644 src/main/java/com/supervision/pdfqaserver/constant/ResultStatusEnum.java create mode 100644 src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java create mode 100644 src/main/java/com/supervision/pdfqaserver/domain/DocumentTruncation.java create mode 100644 src/main/java/com/supervision/pdfqaserver/domain/DomainMetadata.java create mode 100644 src/main/java/com/supervision/pdfqaserver/domain/PdfAnalysisOutput.java create mode 100644 src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java create mode 100644 src/main/java/com/supervision/pdfqaserver/domain/TruncationEntityExtraction.java create mode 100644 src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java create mode 100644 src/main/java/com/supervision/pdfqaserver/domain/TruncationRelationExtraction.java create mode 100644 src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java create mode 100644 src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java create mode 100644 src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java create mode 100644 src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java create mode 100644 src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java create mode 100644 src/main/java/com/supervision/pdfqaserver/exception/BusinessException.java create mode 100644 src/main/java/com/supervision/pdfqaserver/mapper/ChineseEnglishWordsMapper.java create mode 100644 src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java create mode 100644 src/main/java/com/supervision/pdfqaserver/mapper/DomainMetadataMapper.java create mode 100644 src/main/java/com/supervision/pdfqaserver/mapper/PdfAnalysisOutputMapper.java create mode 100644 src/main/java/com/supervision/pdfqaserver/mapper/PdfInfoMapper.java create mode 100644 src/main/java/com/supervision/pdfqaserver/mapper/TruncationEntityExtractionMapper.java create mode 100644 src/main/java/com/supervision/pdfqaserver/mapper/TruncationErAttributeMapper.java create mode 100644 src/main/java/com/supervision/pdfqaserver/mapper/TruncationRelationExtractionMapper.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/ChineseEnglishWordsService.java delete mode 100644 src/main/java/com/supervision/pdfqaserver/service/DocumentSlicer.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/TripleToCypherExecutor.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/TruncationEntityExtractionService.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/TruncationErAttributeService.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/TruncationRelationExtractionService.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/ChineseEnglishWordsServiceImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/TruncationErAttributeServiceImpl.java create mode 100644 src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java create mode 100644 src/main/resources/mapper/ChineseEnglishWordsMapper.xml create mode 100644 src/main/resources/mapper/DocumentTruncationMapper.xml create mode 100644 src/main/resources/mapper/DomainMetadataMapper.xml create mode 100644 src/main/resources/mapper/PdfAnalysisOutputMapper.xml create mode 100644 src/main/resources/mapper/PdfInfoMapper.xml create mode 100644 src/main/resources/mapper/TruncationEntityExtractionMapper.xml create mode 100644 src/main/resources/mapper/TruncationErAttributeMapper.xml create mode 100644 src/main/resources/mapper/TruncationRelationExtractionMapper.xml diff --git a/pom.xml b/pom.xml index 40897ad..34ad1f6 100644 --- a/pom.xml +++ b/pom.xml @@ -23,15 +23,32 @@ org.springframework.boot spring-boot-starter-web + + org.springframework.boot + spring-boot-starter-aop + org.springframework.ai spring-ai-starter-model-ollama - + + com.alibaba + druid-spring-boot-3-starter + 1.2.21 + + + com.baomidou + mybatis-plus-spring-boot3-starter + 3.5.7 + + + com.baomidou + mybatis-plus-boot-starter + 3.5.5 + org.postgresql postgresql - runtime org.projectlombok @@ -48,6 +65,21 @@ hutool-all 5.8.26 + + com.alibaba + fastjson + 1.2.83_noneautotype + + + com.fasterxml.jackson.core + jackson-core + 2.15.3 + + + com.fasterxml.jackson.core + jackson-databind + 2.15.3 + @@ -61,33 +93,4 @@ - - - - - - diff --git a/src/main/java/com/supervision/pdfqaserver/PdfQaServerApplication.java b/src/main/java/com/supervision/pdfqaserver/PdfQaServerApplication.java index eafc706..bd11d75 100644 --- a/src/main/java/com/supervision/pdfqaserver/PdfQaServerApplication.java +++ b/src/main/java/com/supervision/pdfqaserver/PdfQaServerApplication.java @@ -1,9 +1,10 @@ package com.supervision.pdfqaserver; +import org.mybatis.spring.annotation.MapperScan; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; - +@MapperScan(basePackages = {"com.supervision.pdfqaserver.mapper"}) @SpringBootApplication public class PdfQaServerApplication { diff --git a/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java new file mode 100644 index 0000000..25e2613 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java @@ -0,0 +1,185 @@ +package com.supervision.pdfqaserver.cache; + +import java.util.HashMap; +import java.util.Map; + +/** + * 提示词缓存 + */ +public class PromptCache { + + public static final String DOERE_TEXT = "DOERE_TEXT"; + public static final String DOERE_TABLE = "DOERE_TABLE"; + public static final Map promptMap = new HashMap<>(); + + static { + init(); + } + private static void init(){ + promptMap.put(DOERE_TEXT, DOERE_TEXT_PROMPT); + promptMap.put(DOERE_TABLE, DOERE_TABLE_PROMPT); + } + + + + private static final String DOERE_TEXT_PROMPT = """ + 你是一个高级信息抽取引擎,请从给定文本中提取以下结构化信息并以JSON格式输出: + + 1. **节点提取**: + - 识别所有实体作为节点 + - 自动推断每个节点的类型 + - 记录节点的所有相关属性(键值对形式) + + 2. **关系提取**: + - 识别所有节点间的关系 + - 自动推断关系类型 + - 记录关系的所有相关属性(键值对形式) + + 3. **类型化三元组**: + - 生成由 (头节点类型, 关系类型, 尾节点类型) 组成的元组 + + **输出要求**: + - 使用如下JSON Schema: + + { + "nodes": [ + { + "name": "节点名称", + "type": "节点类型", + "attributes": { + "属性名1": "属性值1", + "属性名2": "属性值2" + } + } + ], + "relations": [ + { + "source": "头节点名称", + "target": "尾节点名称", + "type": "关系类型", + "attributes": { + "关系属性名1": "关系属性值1" + } + } + ], + "typed_triplets": [ + ["头节点类型", "关系类型", "尾节点类型"] + ] + } + + + **处理规则**: + 1. 节点类型和关系类型由你根据上下文语义自动创建(如"科学家"/"发明"/"研究所") + 2. 属性字段应包含文本中明确提及或可推导的特征(如数值、时间、状态等) + 3. 对同一实体的不同指代需进行合并(如"特斯拉"和"埃隆·马斯克的公司") + + **示例文本**: + "爱因斯坦在1905年发表了狭义相对论论文,这篇革命性理论后来被普林斯顿高等研究院深入研究" + + **期望输出**: + + { + "nodes": [ + { + "name": "爱因斯坦", + "type": "物理学家", + "attributes": { + "领域": "理论物理" + } + }, + { + "name": "狭义相对论", + "type": "科学理论", + "attributes": { + "发表年份": 1905, + "重要性": "革命性" + } + }, + { + "name": "普林斯顿高等研究院", + "type": "科研机构", + "attributes": { + "研究领域": "理论科学" + } + } + ], + "relations": [ + { + "source": "爱因斯坦", + "target": "狭义相对论", + "type": "发表", + "attributes": { + "时间": 1905 + } + }, + { + "source": "普林斯顿高等研究院", + "target": "狭义相对论", + "type": "研究", + "attributes": { + "强度描述": "深入" + } + } + ], + "typed_triplets": [ + ["物理学家", "发表", "科学理论"], + ["科研机构", "研究", "科学理论"] + ] + } + + 请处理以下文本: + {} + """; + + private static final String DOERE_TABLE_PROMPT = """ + 你是一个表格数据处理专家,请严格按以下要求从给出的表格中提取数据: + + **处理规则:** + 1. 完全保留原始表头字段名称,不做任何中英文转换或修改 + 2. 将每行数据转换为一个独立对象 + 3. 所有数值保留原始格式(包括逗号分隔符和小数点) + 4. 表格第一列作为主键字段 + + **输出格式:** + ```json + { + "table_data": [ + { + "[第一列表头]": "[第一列值]", + "[第二列表头]": "[第二列值]", + "[第三列表头]": "[第三列值]" + }, + // 后续行... + ] + } + ``` + + **示例表格:** + | 账龄 | 期末余额 | 年初余额 | + | --- | --- | --- | + | 1年以内 | 310,844,201.27 | 337,641,834.84 | + | 1至2年 | 52,374,904.35 | 15,041,750.36 | + + **期望输出:** + + { + "table_data": [ + { + "账龄": "1年以内", + "期末余额": "310,844,201.27", + "年初余额": "337,641,834.84" + }, + { + "账龄": "1至2年", + "期末余额": "52,374,904.35", + "年初余额": "15,041,750.36" + } + ] + } + + 请处理以下表格: + {} + """; + + +} diff --git a/src/main/java/com/supervision/pdfqaserver/config/ExceptionHandlerConfig.java b/src/main/java/com/supervision/pdfqaserver/config/ExceptionHandlerConfig.java new file mode 100644 index 0000000..47d2be2 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/config/ExceptionHandlerConfig.java @@ -0,0 +1,63 @@ +package com.supervision.pdfqaserver.config; + +import com.supervision.pdfqaserver.constant.ResultStatusEnum; +import com.supervision.pdfqaserver.dto.R; +import com.supervision.pdfqaserver.exception.BusinessException; +import lombok.extern.slf4j.Slf4j; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.bind.annotation.RestControllerAdvice; +import org.springframework.web.multipart.MaxUploadSizeExceededException; + +/** + * 统一异常处理器配置 + * + * @author wb + * @date 2022/3/10 13:24 + */ +@Slf4j +@Configuration +@RestControllerAdvice(annotations = RestController.class, basePackages = {"com.supervision.ai.service.**.controller"}) +public class ExceptionHandlerConfig { + + /** + * 添加手动校验参数的异常处理 + * + * @param exception 参数验证异常 + * @return 通用返回值 + */ + @ExceptionHandler(IllegalArgumentException.class) + public R manualValidationExceptionResponse(IllegalArgumentException exception) { + log.error("=========手动校验参数异常=========>>>"); + log.error(exception.getMessage(), exception); + log.error("<<<=========手动校验参数异常========="); + return R.fail(ResultStatusEnum.ILLEGAL_ARGUMENT.getCode(), exception.getMessage()); + } + + @ExceptionHandler(BusinessException.class) + public R businessExceptionResponse(BusinessException exception) { + log.error("=========运行异常=========>>>"); + log.error(exception.getMessage(), exception); + log.error("<<<=========运行异常========="); + + return R.fail(511, exception.getMessage()); + } + + @ExceptionHandler(RuntimeException.class) + public R manualValidationExceptionResponse(RuntimeException exception) { + log.error("=========运行异常=========>>>"); + log.error(exception.getMessage(), exception); + log.error("<<<=========运行异常========="); + + return R.fail(ResultStatusEnum.RUNTIME_EXCEPTION.getCode(), exception.getMessage()); + } + + @ExceptionHandler(MaxUploadSizeExceededException.class) + public R handleMaxSizeException(MaxUploadSizeExceededException exception) { + log.error("=========文件大小超出限制异常=========>>>"); + log.error(exception.getMessage(), exception); + log.error("<<<=========文件大小超出限制异常========="); + return R.fail(ResultStatusEnum.EXCEED_FILE_SIZE.getCode(), exception.getMessage()); + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/config/MyMetaObjectHandler.java b/src/main/java/com/supervision/pdfqaserver/config/MyMetaObjectHandler.java new file mode 100644 index 0000000..d192a9c --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/config/MyMetaObjectHandler.java @@ -0,0 +1,26 @@ +package com.supervision.pdfqaserver.config; + +import com.baomidou.mybatisplus.core.handlers.MetaObjectHandler; +import org.apache.ibatis.reflection.MetaObject; + +import java.time.LocalDateTime; + +/** + * @author Ray + */ +public class MyMetaObjectHandler implements MetaObjectHandler { + public MyMetaObjectHandler() { + } + + @Override + public void insertFill(MetaObject metaObject) { + this.setFieldValByName("createTime", LocalDateTime.now(), metaObject); + this.setFieldValByName("updateTime", LocalDateTime.now(), metaObject); + } + + @Override + public void updateFill(MetaObject metaObject) { + this.setFieldValByName("updateTime", LocalDateTime.now(), metaObject); + } + +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/config/MybatisPlusConfig.java b/src/main/java/com/supervision/pdfqaserver/config/MybatisPlusConfig.java new file mode 100644 index 0000000..b72981c --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/config/MybatisPlusConfig.java @@ -0,0 +1,43 @@ +package com.supervision.pdfqaserver.config; + +import com.baomidou.mybatisplus.annotation.DbType; +import com.baomidou.mybatisplus.extension.plugins.MybatisPlusInterceptor; +import com.baomidou.mybatisplus.extension.plugins.inner.PaginationInnerInterceptor; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * MybatisPlus配置 + * + * @author qmy + * @version 1.0.0 2020/10/22 9:47 + * @since JDK1.8 + */ +@Configuration +public class MybatisPlusConfig { + + @Bean + public MyMetaObjectHandler myMetaObjectHandler() { + return new MyMetaObjectHandler(); + } + + /** + * 拦截器配置 + */ + @Bean + public MybatisPlusInterceptor mybatisPlusInterceptor() { + MybatisPlusInterceptor interceptor = new MybatisPlusInterceptor(); + interceptor.addInnerInterceptor(this.paginationInterceptor()); + return interceptor; + } + + private PaginationInnerInterceptor paginationInterceptor() { + PaginationInnerInterceptor paginationInterceptor = new PaginationInnerInterceptor(); + paginationInterceptor.setOverflow(false); + /** + * 注意! 此处要设置数据库类型. + */ + paginationInterceptor.setDbType(DbType.POSTGRE_SQL); + return paginationInterceptor; + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/constant/ResultStatusEnum.java b/src/main/java/com/supervision/pdfqaserver/constant/ResultStatusEnum.java new file mode 100644 index 0000000..471ef1e --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/constant/ResultStatusEnum.java @@ -0,0 +1,55 @@ +package com.supervision.pdfqaserver.constant; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** + * 响应结果状态枚举类 + * @author qimaoyu + * @create 2019-07-14 10:22 + */ +@NoArgsConstructor +@AllArgsConstructor +public enum ResultStatusEnum { + + AUTHENTICATION_FAILED(320, "token失效,请重新登录!"), + NO_ACCESS_TO_THIS_INTERFACE(320, "无权访问此接口!"), + FAILED_TO_GENERATE_TOKEN(321, "生成token失败!"), + ACCOUNT_PASSWORD_INCORRECT(322, "账号或密码错误!"), + ACCOUNT_NOT_CREATE(323, "账号未创建!"), + HAS_BEEN_PULLED_BLACK(324, "已被删除或禁用,无法登录!"), + USERNAME_MAIL_IS_EXIST(341, "登录名称已经被注册!"), + USERNAME_IS_BLANK(342, "登录名称为空!"), + VERIFICATION_CODE_EXPIRED(350,"验证码已过期,请重新获取。"), + VERIFICATION_CODE_FAILURE(351,"验证码输入错误。"), + OPERATE_FAIL(360,"修改毕业生信息失败。"), + DATA_IS_EMPTY(370,"查询到的结果为空"), + SYSTEM_ABNORMAL(500, "系统繁忙,请稍后重试!"), + UPLOAD_EXCEPTION(501, "文件上传异常!"), + EXPORT_EXCEPTION(502, "文件导出异常!"), + INCORRECT_FILE_FORMAT(503, "文件格式不正确!"), + PARAMETER_CANNOT_BE_EMPTY(504, "参数不能为空,操作失败!"), + NO_TEMP_UPLOADFILEPATH(505,"未配置文件上传临时存储路径"), + USER_DOES_NOT_EXIST(507, "用户不存在,操作失败!"), + + ILLEGAL_ARGUMENT(508, "参数校验失败!"), + RUNTIME_EXCEPTION(509, "程序运行异常!"), + EXCEED_FILE_SIZE(510, "文件大小超出限制!"), + IMPORT_COMPANY_FORMAT_ERROR(521,"Excel表格格式错误!"), + IMPORT_COMPANY_FAIL(522,"部分数据导入失败"), + INSERT_FAIL(600,"新增失败"), + DuplicateKeyException(601,"该条信息已经存在,请勿重复添加"), + UPDATE_FAIL(700,"更新失败"), + DELETE_FAIL(800,"删除失败"), + YEAR_IS_CLOSE(1001,"该年度暂未开启"); + + @Getter + @Setter + private int code; + + @Getter + @Setter + private String message; +} diff --git a/src/main/java/com/supervision/pdfqaserver/controller/ChatController.java b/src/main/java/com/supervision/pdfqaserver/controller/ChatController.java index 3af4fd9..598fdab 100644 --- a/src/main/java/com/supervision/pdfqaserver/controller/ChatController.java +++ b/src/main/java/com/supervision/pdfqaserver/controller/ChatController.java @@ -23,6 +23,11 @@ public class ChatController { private final OllamaChatModel ollamaChatModel; + /** + * 仅供调试使用,后期移除该接口 + * @param message + * @return + */ @PostMapping("/chat") public R pageList(@RequestBody Map message) { List messages = new ArrayList<>(); diff --git a/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java b/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java new file mode 100644 index 0000000..6d11b7c --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java @@ -0,0 +1,41 @@ +package com.supervision.pdfqaserver.domain; + +import com.baomidou.mybatisplus.annotation.*; + +import java.io.Serializable; +import java.time.LocalDateTime; +import lombok.Data; + +/** + * 中英文对照字典 + * @TableName chinese_english_words + */ +@TableName(value ="chinese_english_words") +@Data +public class ChineseEnglishWords implements Serializable { + /** + * 中文 + */ + @TableId + private String chineseWord; + + /** + * 英文 + */ + private String englishWord; + + /** + * 创建时间 + */ + @TableField(fill = FieldFill.INSERT) + private LocalDateTime createTime; + + /** + * 更新时间 + */ + @TableField(fill = FieldFill.INSERT_UPDATE) + private LocalDateTime updateTime; + + @TableField(exist = false) + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/domain/DocumentTruncation.java b/src/main/java/com/supervision/pdfqaserver/domain/DocumentTruncation.java new file mode 100644 index 0000000..4145bc2 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/domain/DocumentTruncation.java @@ -0,0 +1,58 @@ +package com.supervision.pdfqaserver.domain; + +import com.baomidou.mybatisplus.annotation.*; + +import java.io.Serializable; +import java.time.LocalDateTime; +import lombok.Data; + +/** + * 文档切分表 + * @TableName document_truncation + */ +@TableName(value ="document_truncation") +@Data +public class DocumentTruncation implements Serializable { + /** + * + */ + @TableId + private String id; + + /** + * 文档id(pdf_info表的id) + */ + private Integer documentId; + + /** + * 段落id pdf_analysis_output表的id + */ + private String sectionId; + + /** + * 布局类型 0-文本 1-表格 + */ + private String layoutType; + + private String title; + + /** + * 片段内容 + */ + private String content; + + /** + * 创建时间 + */ + @TableField(fill = FieldFill.INSERT) + private LocalDateTime createTime; + + /** + * 更新时间 + */ + @TableField(fill = FieldFill.INSERT_UPDATE) + private LocalDateTime updateTime; + + @TableField(exist = false) + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/domain/DomainMetadata.java b/src/main/java/com/supervision/pdfqaserver/domain/DomainMetadata.java new file mode 100644 index 0000000..91695fd --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/domain/DomainMetadata.java @@ -0,0 +1,60 @@ +package com.supervision.pdfqaserver.domain; + +import com.baomidou.mybatisplus.annotation.*; +import java.io.Serializable; +import java.time.LocalDateTime; +import lombok.Data; + +/** + * 领域元数据 + * @TableName domain_metadata + */ +@TableName(value ="domain_metadata") +@Data +public class DomainMetadata implements Serializable { + /** + * + */ + @TableId + private String id; + + /** + * 领域类型 + */ + private String domainType; + + /** + * 头节点类型 + */ + private String sourceType; + + /** + * 关系 + */ + private String relation; + + /** + * 尾节点类型 + */ + private String targetType; + + /** + * 数据来源:0=手动录入,1=系统自动 + */ + private String generationType; + + /** + * 创建时间 + */ + @TableField(fill = FieldFill.INSERT) + private LocalDateTime createTime; + + /** + * 更新时间 + */ + @TableField(fill = FieldFill.INSERT_UPDATE) + private LocalDateTime updateTime; + + @TableField(exist = false) + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/domain/PdfAnalysisOutput.java b/src/main/java/com/supervision/pdfqaserver/domain/PdfAnalysisOutput.java new file mode 100644 index 0000000..4f60a67 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/domain/PdfAnalysisOutput.java @@ -0,0 +1,60 @@ +package com.supervision.pdfqaserver.domain; + +import com.baomidou.mybatisplus.annotation.*; + +import java.io.Serializable; +import java.time.LocalDateTime; +import lombok.Data; + +/** + * + * @TableName pdf_analysis_output + */ +@TableName(value ="pdf_analysis_output") +@Data +public class PdfAnalysisOutput implements Serializable { + /** + * + */ + @TableId + private Integer id; + + /** + * 0-文本 1-表格 + */ + private Integer layoutType; + + /** + * pdf段落内容 + */ + private String content; + + /** + * pdf页码 + */ + private Integer pageNo; + + /** + * pdf_info表的主键 + */ + private Integer pdfId; + + /** + * 表格标题 + */ + private String tableTitle; + + /** + * 内容在pdf页面中的顺序,越小表示顺序越靠前 + */ + private Integer order; + + /** + * + */ + @TableField(fill = FieldFill.INSERT) + private LocalDateTime createTime; + + @TableField(exist = false) + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java b/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java new file mode 100644 index 0000000..e094002 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java @@ -0,0 +1,39 @@ +package com.supervision.pdfqaserver.domain; + +import com.baomidou.mybatisplus.annotation.*; + +import java.io.Serializable; +import java.time.LocalDateTime; +import lombok.Data; + +/** + * pdf信息 + * @TableName pdf_info + */ +@TableName(value ="pdf_info") +@Data +public class PdfInfo implements Serializable { + /** + * + */ + private Integer id; + + /** + * pdf路径 + */ + private String path; + + /** + * 文件名 + */ + private String filename; + + /** + * 创建时间 + */ + @TableField(fill = FieldFill.INSERT) + private LocalDateTime createTime; + + @TableField(exist = false) + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/domain/TruncationEntityExtraction.java b/src/main/java/com/supervision/pdfqaserver/domain/TruncationEntityExtraction.java new file mode 100644 index 0000000..6d2fb49 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/domain/TruncationEntityExtraction.java @@ -0,0 +1,50 @@ +package com.supervision.pdfqaserver.domain; + +import com.baomidou.mybatisplus.annotation.*; +import java.io.Serializable; +import java.time.LocalDateTime; +import lombok.Data; + +/** + * 片段实体抽取 + * @TableName truncation_entity_extraction + */ +@TableName(value ="truncation_entity_extraction") +@Data +public class TruncationEntityExtraction implements Serializable { + /** + * 主键 + */ + @TableId + private String id; + + /** + * 片段id document_truncation表的id + */ + private String truncationId; + + /** + * 标签(实体类型) + */ + private String entity; + + /** + * 实体名 + */ + private String name; + + /** + * 创建时间 + */ + @TableField(fill = FieldFill.INSERT) + private LocalDateTime createTime; + + /** + * 更新时间 + */ + @TableField(fill = FieldFill.INSERT_UPDATE) + private LocalDateTime updateTime; + + @TableField(exist = false) + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java b/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java new file mode 100644 index 0000000..ced3840 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java @@ -0,0 +1,61 @@ +package com.supervision.pdfqaserver.domain; + +import com.baomidou.mybatisplus.annotation.*; + +import java.io.Serializable; +import java.time.LocalDateTime; +import lombok.Data; + +/** + * 实体表 + * @TableName truncation_er_attribute + */ +@TableName(value ="truncation_er_attribute") +@Data +public class TruncationErAttribute implements Serializable { + /** + * + */ + @TableId + private String id; + + /** + * 片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id + */ + private String terId; + + /** + * 类型 0:terId关联的id为实体 1:terId关联的id为关系 + */ + private String type; + + /** + * 实体名 + */ + private String attribute; + + /** + * 值 + */ + private String value; + + /** + * 数据类型 0:字符串 1:数字 + */ + private String dataType; + + /** + * 创建时间 + */ + @TableField(fill = FieldFill.INSERT) + private LocalDateTime createTime; + + /** + * 更新时间 + */ + @TableField(fill = FieldFill.INSERT_UPDATE) + private LocalDateTime updateTime; + + @TableField(exist = false) + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/domain/TruncationRelationExtraction.java b/src/main/java/com/supervision/pdfqaserver/domain/TruncationRelationExtraction.java new file mode 100644 index 0000000..9471d89 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/domain/TruncationRelationExtraction.java @@ -0,0 +1,65 @@ +package com.supervision.pdfqaserver.domain; + +import com.baomidou.mybatisplus.annotation.*; +import java.io.Serializable; +import java.time.LocalDateTime; +import lombok.Data; + +/** + * 片段关系抽取 + * @TableName truncation_relation_extraction + */ +@TableName(value ="truncation_relation_extraction") +@Data +public class TruncationRelationExtraction implements Serializable { + /** + * + */ + @TableId + private String id; + + /** + * + */ + private String truncationId; + + /** + * 头节点 + */ + private String source; + + /** + * 头节点类型 + */ + private String sourceType; + + /** + * 尾节点 + */ + private String target; + + /** + * 尾节点类型 + */ + private String targetType; + + /** + * 关系 + */ + private String relation; + + /** + * 创建时间 + */ + @TableField(fill = FieldFill.INSERT) + private LocalDateTime createTime; + + /** + * 更新时间 + */ + @TableField(fill = FieldFill.INSERT_UPDATE) + private LocalDateTime updateTime; + + @TableField(exist = false) + private static final long serialVersionUID = 1L; +} \ No newline at end of file diff --git a/src/main/java/com/supervision/pdfqaserver/dto/DocumentDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/DocumentDTO.java index deb4096..c3af439 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/DocumentDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/DocumentDTO.java @@ -1,5 +1,6 @@ package com.supervision.pdfqaserver.dto; +import com.supervision.pdfqaserver.domain.PdfAnalysisOutput; import lombok.Data; /** @@ -7,19 +8,29 @@ import lombok.Data; */ @Data public class DocumentDTO { + /** * 文档id */ private String id; + + private Integer documentId; + + private Integer sectionId; + + private Integer pageNo; + /** - * 文档序号 + * 内容类型 0:文本 1:表格 */ - private Integer index; + private String layoutType; /** - * 内容类型 0:文本 1:表格 + * 内容在pdf页面中的顺序,越小表示顺序越靠前 */ - private String type; + private Integer layoutOrder; + + private String title; /** * 文档内容 @@ -32,10 +43,21 @@ public class DocumentDTO { private Integer pageNum; - /** - * 文件名 - */ - private String fileName; + public DocumentDTO() { + } + + public DocumentDTO(PdfAnalysisOutput pdfAnalysisOutput) { + this.sectionId = pdfAnalysisOutput.getId(); + this.documentId = pdfAnalysisOutput.getPdfId(); + if (null != pdfAnalysisOutput.getLayoutType()) { + this.layoutType = pdfAnalysisOutput.getLayoutType().toString(); + } + this.pageNo = pdfAnalysisOutput.getPageNo(); + this.title = pdfAnalysisOutput.getTableTitle(); + this.content = pdfAnalysisOutput.getContent(); + this.layoutOrder = pdfAnalysisOutput.getOrder(); + + } } diff --git a/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java new file mode 100644 index 0000000..bf275f3 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java @@ -0,0 +1,35 @@ +package com.supervision.pdfqaserver.dto; + +import lombok.Data; + +@Data +public class DomainMetadataDTO { + + private String id; + + /** + * 领域类型 + */ + private String domainType; + + /** + * 头节点类型 + */ + private String sourceType; + + /** + * 关系 + */ + private String relation; + + /** + * 尾节点类型 + */ + private String targetType; + + /** + * 数据来源:0=手动录入,1=系统自动 + */ + private String generationType; + +} diff --git a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java new file mode 100644 index 0000000..5064e7c --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java @@ -0,0 +1,46 @@ +package com.supervision.pdfqaserver.dto; + +import lombok.Data; + +/** + * 实体属性 + */ +@Data +public class ERAttributeDTO { + + private String id; + + /** + * 片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id + */ + private String terId; + + /** + * 类型 0:terId关联的id为实体 1:terId关联的id为关系 + */ + private String type; + + /** + * 属性名 + */ + private String attribute; + + /** + * 属性值 + */ + private String value; + + /** + * 数据类型 0:字符串 1:数字 + */ + private String dataType; + + public ERAttributeDTO() { + } + + public ERAttributeDTO(String attribute, String value, String dataType) { + this.attribute = attribute; + this.value = value; + this.dataType = dataType; + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java new file mode 100644 index 0000000..6eda549 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java @@ -0,0 +1,124 @@ +package com.supervision.pdfqaserver.dto; + +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.util.StrUtil; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; + +import java.util.*; + +/** + * 实体关系抽取 + */ +@Slf4j +@Data +public class EREDTO { + + private List entities; + + private List relations; + + public EREDTO() { + } + + public static EREDTO fromTextJson(String json,String truncationId) { + EREDTO eredto = new EREDTO(); + JSONObject jsonObject = JSONObject.parseObject(json); + JSONArray nodes = jsonObject.getJSONArray("nodes"); + JSONArray relations = jsonObject.getJSONArray("relations"); + List entities = new ArrayList<>(); + List relationsList = new ArrayList<>(); + if (CollUtil.isNotEmpty(nodes)){ + for (Object node : nodes) { + JSONObject nodeJson = (JSONObject) node; + String name = nodeJson.getString("name"); + String type = nodeJson.getString("type"); + JSONObject attributes = nodeJson.getJSONObject("attributes"); + if (CollUtil.isNotEmpty(attributes)){ + List erAttributeDTOS = new ArrayList<>(); + for (String key : attributes.keySet()) { + Object value = attributes.get(key); + String valueString = attributes.getString(key); + ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0"); + erAttributeDTOS.add(erAttributeDTO); + } + EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,name,type, erAttributeDTOS); + entities.add(entityExtraction); + } + } + } + if (CollUtil.isNotEmpty(relations)){ + for (Object relation : relations) { + JSONObject relationJson = (JSONObject) relation; + String source = relationJson.getString("source"); + String target = relationJson.getString("target"); + String type = relationJson.getString("type"); + JSONObject attributes = relationJson.getJSONObject("attributes"); + if (CollUtil.isNotEmpty(attributes)){ + List erAttributeDTOS = new ArrayList<>(); + for (String key : attributes.keySet()) { + Object value = attributes.get(key); + String valueString = attributes.getString(key); + ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0"); + erAttributeDTOS.add(erAttributeDTO); + } + if (StrUtil.isEmpty(source) || StrUtil.isEmpty(target)){ + log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson); + continue; + } + Optional sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), source)).findFirst(); + if (sourceTypeOpt.isEmpty()){ + log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson); + continue; + } + Optional targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), target)).findFirst(); + if (targetTypeOpt.isEmpty()){ + log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson); + continue; + } + RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncationId,source, + sourceTypeOpt.get().getEntity(),type,target,targetTypeOpt.get().getEntity(), erAttributeDTOS); + relationsList.add(relationExtractionDTO); + } + } + } + eredto.setEntities(entities); + eredto.setRelations(relationsList); + return eredto; + } + + public static EREDTO fromTableJson(String json,String truncationId) { + + EREDTO eredto = new EREDTO(); + JSONObject jsonObject = JSONObject.parseObject(json); + JSONArray tables = jsonObject.getJSONArray("table_data"); + + if (CollUtil.isEmpty(tables)){ + return eredto; + } + List entities = new ArrayList<>(); + for (Object table : tables) { + JSONObject tableJson = (JSONObject) table; + if (CollUtil.isEmpty(tableJson)){ + continue; + } + EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO(); + entityExtractionDTO.setEntity("row"); + entityExtractionDTO.setName("row"); + entityExtractionDTO.setTruncationId(truncationId); + List erAttributeDTOS = new ArrayList<>(); + for (Map.Entry tableEntry : tableJson.entrySet()) { + String key = tableEntry.getKey(); + Object value = tableEntry.getValue(); + ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, value.toString(), value instanceof Number ? "1" : "0"); + erAttributeDTOS.add(erAttributeDTO); + } + entityExtractionDTO.setAttributes(erAttributeDTOS); + entities.add(entityExtractionDTO); + } + eredto.setEntities(entities); + return eredto; + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java new file mode 100644 index 0000000..30cb118 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java @@ -0,0 +1,37 @@ +package com.supervision.pdfqaserver.dto; + +import lombok.Data; +import java.util.List; + +/** + * 实体抽取 + */ +@Data +public class EntityExtractionDTO { + + private String id; + + private String truncationId; + + /** + * 实体标签 + */ + private String entity; + + /** + * 实体名 + */ + private String name; + + private List attributes; + + public EntityExtractionDTO() { + } + + public EntityExtractionDTO(String truncationId, String entity, String name, List attributes) { + this.truncationId = truncationId; + this.entity = entity; + this.name = name; + this.attributes = attributes; + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java new file mode 100644 index 0000000..1b35063 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java @@ -0,0 +1,55 @@ +package com.supervision.pdfqaserver.dto; + +import lombok.Data; +import java.util.List; + +/** + * 关系抽取 + */ +@Data +public class RelationExtractionDTO { + + private String id; + + private String truncationId; + + /** + * 头节点数据 + */ + private String source; + + /** + * 头节点类型 + */ + private String sourceType; + + /** + *关系 + */ + private String relation; + + /** + * 尾节点数据 + */ + private String target; + + /** + * 尾节点类型 + */ + private String targetType; + + private List attributes; + + public RelationExtractionDTO() { + } + + public RelationExtractionDTO(String truncationId,String source, String sourceType,String relation, String target,String targetType, List attributes) { + this.truncationId = truncationId; + this.source = source; + this.relation = relation; + this.target = target; + this.attributes = attributes; + this.sourceType = sourceType; + this.targetType = targetType; + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java index 1c27e52..db4a5d2 100644 --- a/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java +++ b/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java @@ -1,5 +1,6 @@ package com.supervision.pdfqaserver.dto; +import com.supervision.pdfqaserver.domain.DocumentTruncation; import lombok.Data; /** @@ -14,19 +15,43 @@ public class TruncateDTO { private String id; /** - * 分段类型 0:文本 1:表格 + * 布局类型 0-文本 1-表格 */ - private String type; + private String layoutType; /** - * 分段内容 + * 文档id(pdf_info表的id) */ - private String content; + private Integer documentId; + + /** + * 段落id pdf_analysis_output表的id + */ + private String sectionId; /** * 表格标题 */ private String title; + /** + * 分段内容 + */ + private String content; + + + public DocumentTruncation toDocumentTruncation() { + DocumentTruncation truncation = new DocumentTruncation(); + truncation.setDocumentId(this.documentId); + truncation.setSectionId(this.sectionId); + truncation.setLayoutType(this.layoutType); + truncation.setTitle(this.title); + truncation.setContent(this.content); + return truncation; + } + + + + } diff --git a/src/main/java/com/supervision/pdfqaserver/exception/BusinessException.java b/src/main/java/com/supervision/pdfqaserver/exception/BusinessException.java new file mode 100644 index 0000000..ff1654d --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/exception/BusinessException.java @@ -0,0 +1,76 @@ +/* + * 文 件 名: CustomException + * 版 权: + * 描 述: <描述> + * 修 改 人: RedName + * 修改时间: 2022/8/5 + * 跟踪单号: <跟踪单号> + * 修改单号: <修改单号> + * 修改内容: <修改内容> + */ +package com.supervision.pdfqaserver.exception; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.HttpStatus; + +/** + * <功能详细描述> + * 自定义异常 + * + * @author ljt + * @version [版本号, 2022/8/5] + * @see [相关类/方法] + * @since [产品/模块版本] + */ +@Slf4j +public class BusinessException extends RuntimeException { + /** + * 异常编码 + */ + private final Integer code; + + /** + * 异常信息 + */ + private final String message; + + public BusinessException(Throwable cause) { + super(cause); + this.code = HttpStatus.INTERNAL_SERVER_ERROR.value(); + this.message = null; + + } + + public BusinessException(Throwable cause, String message) { + super(cause); + this.code = HttpStatus.INTERNAL_SERVER_ERROR.value(); + this.message = message; + + } + + public BusinessException(String message) { + this.code = HttpStatus.INTERNAL_SERVER_ERROR.value(); + this.message = message; + } + + public BusinessException(String message, Integer code) { + this.message = message; + this.code = code; + } + + public BusinessException(String message, Throwable e) { + super(message, e); + log.error(message, e); + this.code = HttpStatus.INTERNAL_SERVER_ERROR.value(); + this.message = message; + } + + @Override + public String getMessage() { + return message; + } + + public Integer getCode() { + return code; + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/ChineseEnglishWordsMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/ChineseEnglishWordsMapper.java new file mode 100644 index 0000000..e477948 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/mapper/ChineseEnglishWordsMapper.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.mapper; + +import com.supervision.pdfqaserver.domain.ChineseEnglishWords; +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +/** +* @author Administrator +* @description 针对表【chinese_english_words(中英文对照字典)】的数据库操作Mapper +* @createDate 2025-04-27 11:45:24 +* @Entity com.supervision.pdfqaserver.domain.ChineseEnglishWords +*/ +public interface ChineseEnglishWordsMapper extends BaseMapper { + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java new file mode 100644 index 0000000..92c228c --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.mapper; + +import com.supervision.pdfqaserver.domain.DocumentTruncation; +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +/** +* @author Administrator +* @description 针对表【document_truncation(文档切分表)】的数据库操作Mapper +* @createDate 2025-04-27 11:45:24 +* @Entity com.supervision.pdfqaserver.domain.DocumentTruncation +*/ +public interface DocumentTruncationMapper extends BaseMapper { + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/DomainMetadataMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/DomainMetadataMapper.java new file mode 100644 index 0000000..a48a7ee --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/mapper/DomainMetadataMapper.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.mapper; + +import com.supervision.pdfqaserver.domain.DomainMetadata; +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +/** +* @author Administrator +* @description 针对表【domain_metadata(领域元数据)】的数据库操作Mapper +* @createDate 2025-04-27 11:45:24 +* @Entity com.supervision.pdfqaserver.domain.DomainMetadata +*/ +public interface DomainMetadataMapper extends BaseMapper { + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/PdfAnalysisOutputMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/PdfAnalysisOutputMapper.java new file mode 100644 index 0000000..f2a1f49 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/mapper/PdfAnalysisOutputMapper.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.mapper; + +import com.supervision.pdfqaserver.domain.PdfAnalysisOutput; +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +/** +* @author Administrator +* @description 针对表【pdf_analysis_output】的数据库操作Mapper +* @createDate 2025-04-27 11:45:24 +* @Entity com.supervision.pdfqaserver.domain.PdfAnalysisOutput +*/ +public interface PdfAnalysisOutputMapper extends BaseMapper { + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/PdfInfoMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/PdfInfoMapper.java new file mode 100644 index 0000000..acf6944 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/mapper/PdfInfoMapper.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.mapper; + +import com.supervision.pdfqaserver.domain.PdfInfo; +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +/** +* @author Administrator +* @description 针对表【pdf_info(pdf信息)】的数据库操作Mapper +* @createDate 2025-04-27 11:45:24 +* @Entity com.supervision.pdfqaserver.domain.PdfInfo +*/ +public interface PdfInfoMapper extends BaseMapper { + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/TruncationEntityExtractionMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationEntityExtractionMapper.java new file mode 100644 index 0000000..e451a27 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationEntityExtractionMapper.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.mapper; + +import com.supervision.pdfqaserver.domain.TruncationEntityExtraction; +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +/** +* @author Administrator +* @description 针对表【truncation_entity_extraction(片段实体抽取)】的数据库操作Mapper +* @createDate 2025-04-27 11:45:24 +* @Entity com.supervision.pdfqaserver.domain.TruncationEntityExtraction +*/ +public interface TruncationEntityExtractionMapper extends BaseMapper { + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/TruncationErAttributeMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationErAttributeMapper.java new file mode 100644 index 0000000..90483be --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationErAttributeMapper.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.mapper; + +import com.supervision.pdfqaserver.domain.TruncationErAttribute; +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +/** +* @author Administrator +* @description 针对表【truncation_er_attribute(实体表)】的数据库操作Mapper +* @createDate 2025-04-27 11:45:24 +* @Entity com.supervision.pdfqaserver.domain.TruncationErAttribute +*/ +public interface TruncationErAttributeMapper extends BaseMapper { + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/TruncationRelationExtractionMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationRelationExtractionMapper.java new file mode 100644 index 0000000..7ccc5c5 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationRelationExtractionMapper.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.mapper; + +import com.supervision.pdfqaserver.domain.TruncationRelationExtraction; +import com.baomidou.mybatisplus.core.mapper.BaseMapper; + +/** +* @author Administrator +* @description 针对表【truncation_relation_extraction(片段关系抽取)】的数据库操作Mapper +* @createDate 2025-04-27 11:45:24 +* @Entity com.supervision.pdfqaserver.domain.TruncationRelationExtraction +*/ +public interface TruncationRelationExtractionMapper extends BaseMapper { + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/ChineseEnglishWordsService.java b/src/main/java/com/supervision/pdfqaserver/service/ChineseEnglishWordsService.java new file mode 100644 index 0000000..3e25515 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/ChineseEnglishWordsService.java @@ -0,0 +1,13 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.domain.ChineseEnglishWords; +import com.baomidou.mybatisplus.extension.service.IService; + +/** +* @author Administrator +* @description 针对表【chinese_english_words(中英文对照字典)】的数据库操作Service +* @createDate 2025-04-27 11:45:24 +*/ +public interface ChineseEnglishWordsService extends IService { + +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/DocumentSlicer.java b/src/main/java/com/supervision/pdfqaserver/service/DocumentSlicer.java deleted file mode 100644 index 87916fa..0000000 --- a/src/main/java/com/supervision/pdfqaserver/service/DocumentSlicer.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.supervision.pdfqaserver.service; - -import com.supervision.pdfqaserver.dto.DocumentDTO; -import com.supervision.pdfqaserver.dto.TruncateDTO; - -import java.util.List; - -/** - * 文档切分器 - */ -public interface DocumentSlicer { - - /** - * 切分文档 - * @param documents 文档列表 - * @return - */ - List slice(List documents); -} diff --git a/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java b/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java new file mode 100644 index 0000000..21be737 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java @@ -0,0 +1,18 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.domain.DocumentTruncation; +import com.baomidou.mybatisplus.extension.service.IService; +import com.supervision.pdfqaserver.dto.TruncateDTO; + +import java.util.List; + +/** +* @author Administrator +* @description 针对表【document_truncation(文档切分表)】的数据库操作Service +* @createDate 2025-04-27 11:45:24 +*/ +public interface DocumentTruncationService extends IService { + + + void batchSave(List truncateDTOS); +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java new file mode 100644 index 0000000..c1f5e1b --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java @@ -0,0 +1,13 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.domain.DomainMetadata; +import com.baomidou.mybatisplus.extension.service.IService; + +/** +* @author Administrator +* @description 针对表【domain_metadata(领域元数据)】的数据库操作Service +* @createDate 2025-04-27 11:45:24 +*/ +public interface DomainMetadataService extends IService { + +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java b/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java new file mode 100644 index 0000000..62993ad --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java @@ -0,0 +1,21 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.dto.EREDTO; + +/** + * 知识图谱服务接口 + */ +public interface KnowledgeGraphService { + + + /** + * 生成知识图谱 + * @param documentId 文档ID + */ + void generateGraph(String documentId); + + void queryGraph(String databaseId, String query); + + + void saveERE(EREDTO eredto, String truncationId); +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java b/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java new file mode 100644 index 0000000..f534c5f --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java @@ -0,0 +1,16 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.domain.PdfAnalysisOutput; +import com.baomidou.mybatisplus.extension.service.IService; + +import java.util.List; + +/** +* @author Administrator +* @description 针对表【pdf_analysis_output】的数据库操作Service +* @createDate 2025-04-27 11:45:24 +*/ +public interface PdfAnalysisOutputService extends IService { + + List queryByPdfId(String pdfId); +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java b/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java new file mode 100644 index 0000000..e5ef730 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java @@ -0,0 +1,13 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.domain.PdfInfo; +import com.baomidou.mybatisplus.extension.service.IService; + +/** +* @author Administrator +* @description 针对表【pdf_info(pdf信息)】的数据库操作Service +* @createDate 2025-04-27 11:45:24 +*/ +public interface PdfInfoService extends IService { + +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java b/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java new file mode 100644 index 0000000..094f16f --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java @@ -0,0 +1,36 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.dto.EREDTO; +import com.supervision.pdfqaserver.dto.DocumentDTO; +import com.supervision.pdfqaserver.dto.TruncateDTO; + +import java.util.List; + +/** + * 三元组转换管道 + */ +public interface TripleConversionPipeline { + + /** + * 切分文档 + * @param documents 文档列表 + * @return + */ + List sliceDocuments(List documents); + + + /** + * 实体关系抽取 + * @param truncateDTO 切分文档 + * @return + */ + EREDTO doEre(TruncateDTO truncateDTO); + + /** + * 合并实体关系抽取结果 + * @param eredtoList 实体关系抽取结果列表 + * @return + */ + List mergeEreResults(List eredtoList); + +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/TripleToCypherExecutor.java b/src/main/java/com/supervision/pdfqaserver/service/TripleToCypherExecutor.java new file mode 100644 index 0000000..d48050d --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/TripleToCypherExecutor.java @@ -0,0 +1,31 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.dto.EREDTO; + +/** + * 三元组转换为Cypher语句的执行器 + */ +public interface TripleToCypherExecutor { + + /** + * 生成Cypher语句 + * @param eredto + * @return + */ + String generateInsertCypher(EREDTO eredto); + + + /** + * 生成查询Cypher语句 + * @param query + * @return + */ + String generateQueryCypher(String query); + + /** + * 执行Cypher语句 + * @param cypher + * @return + */ + void executeCypher(String cypher); +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/TruncationEntityExtractionService.java b/src/main/java/com/supervision/pdfqaserver/service/TruncationEntityExtractionService.java new file mode 100644 index 0000000..99f4ecc --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/TruncationEntityExtractionService.java @@ -0,0 +1,13 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.domain.TruncationEntityExtraction; +import com.baomidou.mybatisplus.extension.service.IService; + +/** +* @author Administrator +* @description 针对表【truncation_entity_extraction(片段实体抽取)】的数据库操作Service +* @createDate 2025-04-27 11:45:24 +*/ +public interface TruncationEntityExtractionService extends IService { + +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/TruncationErAttributeService.java b/src/main/java/com/supervision/pdfqaserver/service/TruncationErAttributeService.java new file mode 100644 index 0000000..35880b4 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/TruncationErAttributeService.java @@ -0,0 +1,13 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.domain.TruncationErAttribute; +import com.baomidou.mybatisplus.extension.service.IService; + +/** +* @author Administrator +* @description 针对表【truncation_er_attribute(实体表)】的数据库操作Service +* @createDate 2025-04-27 11:45:24 +*/ +public interface TruncationErAttributeService extends IService { + +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/TruncationRelationExtractionService.java b/src/main/java/com/supervision/pdfqaserver/service/TruncationRelationExtractionService.java new file mode 100644 index 0000000..666e7b7 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/TruncationRelationExtractionService.java @@ -0,0 +1,13 @@ +package com.supervision.pdfqaserver.service; + +import com.supervision.pdfqaserver.domain.TruncationRelationExtraction; +import com.baomidou.mybatisplus.extension.service.IService; + +/** +* @author Administrator +* @description 针对表【truncation_relation_extraction(片段关系抽取)】的数据库操作Service +* @createDate 2025-04-27 11:45:24 +*/ +public interface TruncationRelationExtractionService extends IService { + +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/ChineseEnglishWordsServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/ChineseEnglishWordsServiceImpl.java new file mode 100644 index 0000000..57106ef --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/ChineseEnglishWordsServiceImpl.java @@ -0,0 +1,22 @@ +package com.supervision.pdfqaserver.service.impl; + +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.supervision.pdfqaserver.domain.ChineseEnglishWords; +import com.supervision.pdfqaserver.service.ChineseEnglishWordsService; +import com.supervision.pdfqaserver.mapper.ChineseEnglishWordsMapper; +import org.springframework.stereotype.Service; + +/** +* @author Administrator +* @description 针对表【chinese_english_words(中英文对照字典)】的数据库操作Service实现 +* @createDate 2025-04-27 11:45:24 +*/ +@Service +public class ChineseEnglishWordsServiceImpl extends ServiceImpl + implements ChineseEnglishWordsService{ + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java new file mode 100644 index 0000000..390b2f4 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java @@ -0,0 +1,33 @@ +package com.supervision.pdfqaserver.service.impl; + +import cn.hutool.core.collection.CollUtil; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.supervision.pdfqaserver.domain.DocumentTruncation; +import com.supervision.pdfqaserver.dto.TruncateDTO; +import com.supervision.pdfqaserver.service.DocumentTruncationService; +import com.supervision.pdfqaserver.mapper.DocumentTruncationMapper; +import org.springframework.stereotype.Service; + +import java.util.List; + +/** +* @author Administrator +* @description 针对表【document_truncation(文档切分表)】的数据库操作Service实现 +* @createDate 2025-04-27 11:45:24 +*/ +@Service +public class DocumentTruncationServiceImpl extends ServiceImpl + implements DocumentTruncationService{ + + @Override + public void batchSave(List truncateDTOS) { + if (CollUtil.isEmpty(truncateDTOS)){ + return; + } + truncateDTOS.stream().map(TruncateDTO::toDocumentTruncation).forEach(this::save); + } +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java new file mode 100644 index 0000000..d246477 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java @@ -0,0 +1,22 @@ +package com.supervision.pdfqaserver.service.impl; + +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.supervision.pdfqaserver.domain.DomainMetadata; +import com.supervision.pdfqaserver.service.DomainMetadataService; +import com.supervision.pdfqaserver.mapper.DomainMetadataMapper; +import org.springframework.stereotype.Service; + +/** +* @author Administrator +* @description 针对表【domain_metadata(领域元数据)】的数据库操作Service实现 +* @createDate 2025-04-27 11:45:24 +*/ +@Service +public class DomainMetadataServiceImpl extends ServiceImpl + implements DomainMetadataService{ + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java new file mode 100644 index 0000000..06364c7 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java @@ -0,0 +1,84 @@ +package com.supervision.pdfqaserver.service.impl; + +import cn.hutool.core.collection.CollUtil; +import com.supervision.pdfqaserver.dto.EREDTO; +import com.supervision.pdfqaserver.domain.PdfAnalysisOutput; +import com.supervision.pdfqaserver.dto.DocumentDTO; +import com.supervision.pdfqaserver.dto.TruncateDTO; +import com.supervision.pdfqaserver.service.*; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; + +@Slf4j +@Service +@RequiredArgsConstructor +public class KnowledgeGraphServiceImpl implements KnowledgeGraphService { + + private final TripleConversionPipeline tripleConversionPipeline; + + private final TripleToCypherExecutor tripleToCypherExecutor; + + private final ChineseEnglishWordsService chineseEnglishWordsService; + + private final DocumentTruncationService documentTruncationService; + + private final DomainMetadataService domainMetadataService; + + private final PdfAnalysisOutputService pdfAnalysisOutputService; + + private final PdfInfoService pdfInfoService; + + private final TruncationEntityExtractionService truncationEntityExtractionService; + + private final TruncationRelationExtractionService relationExtractionService; + + private final TruncationErAttributeService truncationErAttributeService; + + @Override + public void generateGraph(String documentId) { + List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(documentId); + if (CollUtil.isEmpty(pdfAnalysisOutputs)) { + log.info("没有找到pdfId为{}的pdf分析结果", documentId); + return; + } + List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).toList(); + // 对文档进行切分 + List truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList); + // 保存分片信息 + documentTruncationService.batchSave(truncateDTOS); + + // 对切分后的文档进行命名实体识别 + List eredtoList = new ArrayList<>(); + for (TruncateDTO truncateDTO : truncateDTOS) { + EREDTO eredto = tripleConversionPipeline.doEre(truncateDTO); + // 保存实体关系抽取结果 + this.saveERE(eredto, truncateDTO.getId()); + } + + // 合并实体关系抽取结果 + List mergedList = tripleConversionPipeline.mergeEreResults(eredtoList); + + for (EREDTO eredto : mergedList) { + String insertCypher = tripleToCypherExecutor.generateInsertCypher(eredto); + + tripleToCypherExecutor.executeCypher(insertCypher); + } + + + } + + @Override + public void queryGraph(String databaseId, String query) { + + } + + @Override + public void saveERE(EREDTO eredto, String truncationId) { + + } + +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java new file mode 100644 index 0000000..c08f7e6 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java @@ -0,0 +1,31 @@ +package com.supervision.pdfqaserver.service.impl; + +import cn.hutool.core.lang.Assert; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.supervision.pdfqaserver.domain.PdfAnalysisOutput; +import com.supervision.pdfqaserver.service.PdfAnalysisOutputService; +import com.supervision.pdfqaserver.mapper.PdfAnalysisOutputMapper; +import org.springframework.stereotype.Service; + +import java.util.List; + +/** +* @author Administrator +* @description 针对表【pdf_analysis_output】的数据库操作Service实现 +* @createDate 2025-04-27 11:45:24 +*/ +@Service +public class PdfAnalysisOutputServiceImpl extends ServiceImpl + implements PdfAnalysisOutputService{ + + @Override + public List queryByPdfId(String pdfId) { + Assert.notEmpty(pdfId, "pdfId不能为空"); + + return super.lambdaQuery().eq(PdfAnalysisOutput::getPdfId, pdfId).list(); + } +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java new file mode 100644 index 0000000..600d857 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java @@ -0,0 +1,22 @@ +package com.supervision.pdfqaserver.service.impl; + +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.supervision.pdfqaserver.domain.PdfInfo; +import com.supervision.pdfqaserver.service.PdfInfoService; +import com.supervision.pdfqaserver.mapper.PdfInfoMapper; +import org.springframework.stereotype.Service; + +/** +* @author Administrator +* @description 针对表【pdf_info(pdf信息)】的数据库操作Service实现 +* @createDate 2025-04-27 11:45:24 +*/ +@Service +public class PdfInfoServiceImpl extends ServiceImpl + implements PdfInfoService{ + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java new file mode 100644 index 0000000..57899ca --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java @@ -0,0 +1,105 @@ +package com.supervision.pdfqaserver.service.impl; + +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.util.StrUtil; +import com.supervision.pdfqaserver.cache.PromptCache; +import com.supervision.pdfqaserver.dto.*; +import com.supervision.pdfqaserver.service.TripleConversionPipeline; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.ollama.OllamaChatModel; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; +@Slf4j +@Service +@RequiredArgsConstructor +public class TripleConversionPipelineImpl implements TripleConversionPipeline { + + private final OllamaChatModel ollamaChatModel; + + @Override + public List sliceDocuments(List documents) { + // 对pdfAnalysisOutputs进行排序 + List documentDTOList = documents.stream().sorted( + // 先对pageNo进行排序再对layoutOrder进行排序 + (o1, o2) -> { + if (o1.getPageNo().equals(o2.getPageNo())) { + return Integer.compare(o1.getLayoutOrder(), o2.getLayoutOrder()); + } + return Integer.compare(o1.getPageNo(), o2.getPageNo()); + } + ).toList(); + return null; + } + + @Override + public EREDTO doEre(TruncateDTO truncateDTO) { + + if (StrUtil.equals(truncateDTO.getLayoutType(),"0")){ + + EREDTO eredto = doTextEre(truncateDTO); + return eredto; + } + + if (StrUtil.equals(truncateDTO.getLayoutType(),"1")){ + EREDTO eredto = doTableEre(truncateDTO); + return eredto; + } + log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType()); + return null; + } + + private EREDTO doTextEre(TruncateDTO truncateDTO) { + String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TEXT); + String formatted = String.format(prompt, truncateDTO.getContent()); + String response = ollamaChatModel.call(formatted); + // todo:暂时不去处理异常返回 + + return EREDTO.fromTextJson(response, truncateDTO.getId()); + } + + private EREDTO doTableEre(TruncateDTO truncateDTO) { + String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TABLE); + String formatted = String.format(prompt, truncateDTO.getContent()); + String response = ollamaChatModel.call(formatted); + // todo:暂时不去处理异常返回 + + return EREDTO.fromTableJson(response, truncateDTO.getId()); + } + + /** + * 合并实体关系抽取结果 主要是对实体和关系中的属性进行合并 + * @param eredtoList 实体关系抽取结果列表 + * @return + */ + @Override + public List mergeEreResults(List eredtoList) { + List merged = new ArrayList<>(); + if (CollUtil.isEmpty(eredtoList)){ + return merged; + } + for (EREDTO eredto : eredtoList) { + List entities = eredto.getEntities(); + if (CollUtil.isNotEmpty(entities)){ + for (EntityExtractionDTO entity : entities) { + String e = entity.getEntity(); + String name = entity.getName(); + // entity.getEntity() 和 entity.getName() 完全相等看作是同一个数据 + } + } + List relations = eredto.getRelations(); + if (CollUtil.isNotEmpty(relations)){ + for (RelationExtractionDTO relation : relations) { + String source = relation.getSource(); + String target = relation.getTarget(); + String re = relation.getRelation(); + // source和target,re完全相等看作是同一个数据 + } + } + } + + return null; + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java new file mode 100644 index 0000000..1d82afa --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java @@ -0,0 +1,30 @@ +package com.supervision.pdfqaserver.service.impl; + +import com.supervision.pdfqaserver.dto.EREDTO; +import com.supervision.pdfqaserver.service.TripleToCypherExecutor; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.ollama.OllamaChatModel; +import org.springframework.stereotype.Service; + +@Slf4j +@Service +@RequiredArgsConstructor +public class TripleToCypherExecutorImpl implements TripleToCypherExecutor { + + private final OllamaChatModel ollamaChatModel; + @Override + public String generateInsertCypher(EREDTO eredto) { + return null; + } + + @Override + public String generateQueryCypher(String query) { + return null; + } + + @Override + public void executeCypher(String cypher) { + + } +} diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java new file mode 100644 index 0000000..aafd16f --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java @@ -0,0 +1,22 @@ +package com.supervision.pdfqaserver.service.impl; + +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.supervision.pdfqaserver.domain.TruncationEntityExtraction; +import com.supervision.pdfqaserver.service.TruncationEntityExtractionService; +import com.supervision.pdfqaserver.mapper.TruncationEntityExtractionMapper; +import org.springframework.stereotype.Service; + +/** +* @author Administrator +* @description 针对表【truncation_entity_extraction(片段实体抽取)】的数据库操作Service实现 +* @createDate 2025-04-27 11:45:24 +*/ +@Service +public class TruncationEntityExtractionServiceImpl extends ServiceImpl + implements TruncationEntityExtractionService{ + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationErAttributeServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationErAttributeServiceImpl.java new file mode 100644 index 0000000..3de845c --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationErAttributeServiceImpl.java @@ -0,0 +1,22 @@ +package com.supervision.pdfqaserver.service.impl; + +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.supervision.pdfqaserver.domain.TruncationErAttribute; +import com.supervision.pdfqaserver.service.TruncationErAttributeService; +import com.supervision.pdfqaserver.mapper.TruncationErAttributeMapper; +import org.springframework.stereotype.Service; + +/** +* @author Administrator +* @description 针对表【truncation_er_attribute(实体表)】的数据库操作Service实现 +* @createDate 2025-04-27 11:45:24 +*/ +@Service +public class TruncationErAttributeServiceImpl extends ServiceImpl + implements TruncationErAttributeService{ + +} + + + + diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java new file mode 100644 index 0000000..5ab6692 --- /dev/null +++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java @@ -0,0 +1,22 @@ +package com.supervision.pdfqaserver.service.impl; + +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.supervision.pdfqaserver.domain.TruncationRelationExtraction; +import com.supervision.pdfqaserver.service.TruncationRelationExtractionService; +import com.supervision.pdfqaserver.mapper.TruncationRelationExtractionMapper; +import org.springframework.stereotype.Service; + +/** +* @author Administrator +* @description 针对表【truncation_relation_extraction(片段关系抽取)】的数据库操作Service实现 +* @createDate 2025-04-27 11:45:24 +*/ +@Service +public class TruncationRelationExtractionServiceImpl extends ServiceImpl + implements TruncationRelationExtractionService{ + +} + + + + diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index e66b0ba..17e179f 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -1,6 +1,16 @@ spring: application: name: pdf-qa-server + datasource: + druid: + url: jdbc:postgresql://192.168.10.137:54321/pdf-qa + username: postgres + password: 123456 + driver-class-name: org.postgresql.Driver + servlet: + multipart: + max-file-size: 10MB + max-request-size: 100MB ai: ollama: baseUrl: http://192.168.10.70:11434 diff --git a/src/main/resources/mapper/ChineseEnglishWordsMapper.xml b/src/main/resources/mapper/ChineseEnglishWordsMapper.xml new file mode 100644 index 0000000..5b71804 --- /dev/null +++ b/src/main/resources/mapper/ChineseEnglishWordsMapper.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + chinese_word,english_word,create_time, + update_time + + diff --git a/src/main/resources/mapper/DocumentTruncationMapper.xml b/src/main/resources/mapper/DocumentTruncationMapper.xml new file mode 100644 index 0000000..2440ce9 --- /dev/null +++ b/src/main/resources/mapper/DocumentTruncationMapper.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + id,document_id,section_id,title, + layout_type,content,create_time, + update_time + + diff --git a/src/main/resources/mapper/DomainMetadataMapper.xml b/src/main/resources/mapper/DomainMetadataMapper.xml new file mode 100644 index 0000000..3f1d122 --- /dev/null +++ b/src/main/resources/mapper/DomainMetadataMapper.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + id,domain_type,source_type, + relation,target_type,generation_type, + create_time,update_time + + diff --git a/src/main/resources/mapper/PdfAnalysisOutputMapper.xml b/src/main/resources/mapper/PdfAnalysisOutputMapper.xml new file mode 100644 index 0000000..87a9397 --- /dev/null +++ b/src/main/resources/mapper/PdfAnalysisOutputMapper.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + id,layout_type,content, + page_no,pdf_id,table_title, + order,create_time + + diff --git a/src/main/resources/mapper/PdfInfoMapper.xml b/src/main/resources/mapper/PdfInfoMapper.xml new file mode 100644 index 0000000..aa23b3e --- /dev/null +++ b/src/main/resources/mapper/PdfInfoMapper.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + id,path,filename, + create_time + + diff --git a/src/main/resources/mapper/TruncationEntityExtractionMapper.xml b/src/main/resources/mapper/TruncationEntityExtractionMapper.xml new file mode 100644 index 0000000..0084953 --- /dev/null +++ b/src/main/resources/mapper/TruncationEntityExtractionMapper.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + id,truncation_id,entity, + name,create_time,update_time + + diff --git a/src/main/resources/mapper/TruncationErAttributeMapper.xml b/src/main/resources/mapper/TruncationErAttributeMapper.xml new file mode 100644 index 0000000..894cebe --- /dev/null +++ b/src/main/resources/mapper/TruncationErAttributeMapper.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + id,ter_id,type, + attribute,value,data_type, + create_time,update_time + + diff --git a/src/main/resources/mapper/TruncationRelationExtractionMapper.xml b/src/main/resources/mapper/TruncationRelationExtractionMapper.xml new file mode 100644 index 0000000..e779946 --- /dev/null +++ b/src/main/resources/mapper/TruncationRelationExtractionMapper.xml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + id,truncation_id,source, + source_type,target,target_type, + relation,create_time,update_time + +