diff --git a/pom.xml b/pom.xml
index 40897ad..34ad1f6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,15 +23,32 @@
org.springframework.boot
spring-boot-starter-web
+
+ org.springframework.boot
+ spring-boot-starter-aop
+
org.springframework.ai
spring-ai-starter-model-ollama
-
+
+ com.alibaba
+ druid-spring-boot-3-starter
+ 1.2.21
+
+
+ com.baomidou
+ mybatis-plus-spring-boot3-starter
+ 3.5.7
+
+
+ com.baomidou
+ mybatis-plus-boot-starter
+ 3.5.5
+
org.postgresql
postgresql
- runtime
org.projectlombok
@@ -48,6 +65,21 @@
hutool-all
5.8.26
+
+ com.alibaba
+ fastjson
+ 1.2.83_noneautotype
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+ 2.15.3
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.15.3
+
@@ -61,33 +93,4 @@
-
-
-
-
-
-
diff --git a/src/main/java/com/supervision/pdfqaserver/PdfQaServerApplication.java b/src/main/java/com/supervision/pdfqaserver/PdfQaServerApplication.java
index eafc706..bd11d75 100644
--- a/src/main/java/com/supervision/pdfqaserver/PdfQaServerApplication.java
+++ b/src/main/java/com/supervision/pdfqaserver/PdfQaServerApplication.java
@@ -1,9 +1,10 @@
package com.supervision.pdfqaserver;
+import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
-
+@MapperScan(basePackages = {"com.supervision.pdfqaserver.mapper"})
@SpringBootApplication
public class PdfQaServerApplication {
diff --git a/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java
new file mode 100644
index 0000000..25e2613
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/cache/PromptCache.java
@@ -0,0 +1,185 @@
+package com.supervision.pdfqaserver.cache;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 提示词缓存
+ */
+public class PromptCache {
+
+ public static final String DOERE_TEXT = "DOERE_TEXT";
+ public static final String DOERE_TABLE = "DOERE_TABLE";
+ public static final Map promptMap = new HashMap<>();
+
+ static {
+ init();
+ }
+ private static void init(){
+ promptMap.put(DOERE_TEXT, DOERE_TEXT_PROMPT);
+ promptMap.put(DOERE_TABLE, DOERE_TABLE_PROMPT);
+ }
+
+
+
+ private static final String DOERE_TEXT_PROMPT = """
+ 你是一个高级信息抽取引擎,请从给定文本中提取以下结构化信息并以JSON格式输出:
+
+ 1. **节点提取**:
+ - 识别所有实体作为节点
+ - 自动推断每个节点的类型
+ - 记录节点的所有相关属性(键值对形式)
+
+ 2. **关系提取**:
+ - 识别所有节点间的关系
+ - 自动推断关系类型
+ - 记录关系的所有相关属性(键值对形式)
+
+ 3. **类型化三元组**:
+ - 生成由 (头节点类型, 关系类型, 尾节点类型) 组成的元组
+
+ **输出要求**:
+ - 使用如下JSON Schema:
+
+ {
+ "nodes": [
+ {
+ "name": "节点名称",
+ "type": "节点类型",
+ "attributes": {
+ "属性名1": "属性值1",
+ "属性名2": "属性值2"
+ }
+ }
+ ],
+ "relations": [
+ {
+ "source": "头节点名称",
+ "target": "尾节点名称",
+ "type": "关系类型",
+ "attributes": {
+ "关系属性名1": "关系属性值1"
+ }
+ }
+ ],
+ "typed_triplets": [
+ ["头节点类型", "关系类型", "尾节点类型"]
+ ]
+ }
+
+
+ **处理规则**:
+ 1. 节点类型和关系类型由你根据上下文语义自动创建(如"科学家"/"发明"/"研究所")
+ 2. 属性字段应包含文本中明确提及或可推导的特征(如数值、时间、状态等)
+ 3. 对同一实体的不同指代需进行合并(如"特斯拉"和"埃隆·马斯克的公司")
+
+ **示例文本**:
+ "爱因斯坦在1905年发表了狭义相对论论文,这篇革命性理论后来被普林斯顿高等研究院深入研究"
+
+ **期望输出**:
+
+ {
+ "nodes": [
+ {
+ "name": "爱因斯坦",
+ "type": "物理学家",
+ "attributes": {
+ "领域": "理论物理"
+ }
+ },
+ {
+ "name": "狭义相对论",
+ "type": "科学理论",
+ "attributes": {
+ "发表年份": 1905,
+ "重要性": "革命性"
+ }
+ },
+ {
+ "name": "普林斯顿高等研究院",
+ "type": "科研机构",
+ "attributes": {
+ "研究领域": "理论科学"
+ }
+ }
+ ],
+ "relations": [
+ {
+ "source": "爱因斯坦",
+ "target": "狭义相对论",
+ "type": "发表",
+ "attributes": {
+ "时间": 1905
+ }
+ },
+ {
+ "source": "普林斯顿高等研究院",
+ "target": "狭义相对论",
+ "type": "研究",
+ "attributes": {
+ "强度描述": "深入"
+ }
+ }
+ ],
+ "typed_triplets": [
+ ["物理学家", "发表", "科学理论"],
+ ["科研机构", "研究", "科学理论"]
+ ]
+ }
+
+ 请处理以下文本:
+ {}
+ """;
+
+ private static final String DOERE_TABLE_PROMPT = """
+ 你是一个表格数据处理专家,请严格按以下要求从给出的表格中提取数据:
+
+ **处理规则:**
+ 1. 完全保留原始表头字段名称,不做任何中英文转换或修改
+ 2. 将每行数据转换为一个独立对象
+ 3. 所有数值保留原始格式(包括逗号分隔符和小数点)
+ 4. 表格第一列作为主键字段
+
+ **输出格式:**
+ ```json
+ {
+ "table_data": [
+ {
+ "[第一列表头]": "[第一列值]",
+ "[第二列表头]": "[第二列值]",
+ "[第三列表头]": "[第三列值]"
+ },
+ // 后续行...
+ ]
+ }
+ ```
+
+ **示例表格:**
+ | 账龄 | 期末余额 | 年初余额 |
+ | --- | --- | --- |
+ | 1年以内 | 310,844,201.27 | 337,641,834.84 |
+ | 1至2年 | 52,374,904.35 | 15,041,750.36 |
+
+ **期望输出:**
+
+ {
+ "table_data": [
+ {
+ "账龄": "1年以内",
+ "期末余额": "310,844,201.27",
+ "年初余额": "337,641,834.84"
+ },
+ {
+ "账龄": "1至2年",
+ "期末余额": "52,374,904.35",
+ "年初余额": "15,041,750.36"
+ }
+ ]
+ }
+
+ 请处理以下表格:
+ {}
+ """;
+
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/config/ExceptionHandlerConfig.java b/src/main/java/com/supervision/pdfqaserver/config/ExceptionHandlerConfig.java
new file mode 100644
index 0000000..47d2be2
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/config/ExceptionHandlerConfig.java
@@ -0,0 +1,63 @@
+package com.supervision.pdfqaserver.config;
+
+import com.supervision.pdfqaserver.constant.ResultStatusEnum;
+import com.supervision.pdfqaserver.dto.R;
+import com.supervision.pdfqaserver.exception.BusinessException;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.web.bind.annotation.ExceptionHandler;
+import org.springframework.web.bind.annotation.RestController;
+import org.springframework.web.bind.annotation.RestControllerAdvice;
+import org.springframework.web.multipart.MaxUploadSizeExceededException;
+
+/**
+ * 统一异常处理器配置
+ *
+ * @author wb
+ * @date 2022/3/10 13:24
+ */
+@Slf4j
+@Configuration
+@RestControllerAdvice(annotations = RestController.class, basePackages = {"com.supervision.ai.service.**.controller"})
+public class ExceptionHandlerConfig {
+
+ /**
+ * 添加手动校验参数的异常处理
+ *
+ * @param exception 参数验证异常
+ * @return 通用返回值
+ */
+ @ExceptionHandler(IllegalArgumentException.class)
+ public R> manualValidationExceptionResponse(IllegalArgumentException exception) {
+ log.error("=========手动校验参数异常=========>>>");
+ log.error(exception.getMessage(), exception);
+ log.error("<<<=========手动校验参数异常=========");
+ return R.fail(ResultStatusEnum.ILLEGAL_ARGUMENT.getCode(), exception.getMessage());
+ }
+
+ @ExceptionHandler(BusinessException.class)
+ public R> businessExceptionResponse(BusinessException exception) {
+ log.error("=========运行异常=========>>>");
+ log.error(exception.getMessage(), exception);
+ log.error("<<<=========运行异常=========");
+
+ return R.fail(511, exception.getMessage());
+ }
+
+ @ExceptionHandler(RuntimeException.class)
+ public R> manualValidationExceptionResponse(RuntimeException exception) {
+ log.error("=========运行异常=========>>>");
+ log.error(exception.getMessage(), exception);
+ log.error("<<<=========运行异常=========");
+
+ return R.fail(ResultStatusEnum.RUNTIME_EXCEPTION.getCode(), exception.getMessage());
+ }
+
+ @ExceptionHandler(MaxUploadSizeExceededException.class)
+ public R> handleMaxSizeException(MaxUploadSizeExceededException exception) {
+ log.error("=========文件大小超出限制异常=========>>>");
+ log.error(exception.getMessage(), exception);
+ log.error("<<<=========文件大小超出限制异常=========");
+ return R.fail(ResultStatusEnum.EXCEED_FILE_SIZE.getCode(), exception.getMessage());
+ }
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/config/MyMetaObjectHandler.java b/src/main/java/com/supervision/pdfqaserver/config/MyMetaObjectHandler.java
new file mode 100644
index 0000000..d192a9c
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/config/MyMetaObjectHandler.java
@@ -0,0 +1,26 @@
+package com.supervision.pdfqaserver.config;
+
+import com.baomidou.mybatisplus.core.handlers.MetaObjectHandler;
+import org.apache.ibatis.reflection.MetaObject;
+
+import java.time.LocalDateTime;
+
+/**
+ * @author Ray
+ */
+public class MyMetaObjectHandler implements MetaObjectHandler {
+ public MyMetaObjectHandler() {
+ }
+
+ @Override
+ public void insertFill(MetaObject metaObject) {
+ this.setFieldValByName("createTime", LocalDateTime.now(), metaObject);
+ this.setFieldValByName("updateTime", LocalDateTime.now(), metaObject);
+ }
+
+ @Override
+ public void updateFill(MetaObject metaObject) {
+ this.setFieldValByName("updateTime", LocalDateTime.now(), metaObject);
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/java/com/supervision/pdfqaserver/config/MybatisPlusConfig.java b/src/main/java/com/supervision/pdfqaserver/config/MybatisPlusConfig.java
new file mode 100644
index 0000000..b72981c
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/config/MybatisPlusConfig.java
@@ -0,0 +1,43 @@
+package com.supervision.pdfqaserver.config;
+
+import com.baomidou.mybatisplus.annotation.DbType;
+import com.baomidou.mybatisplus.extension.plugins.MybatisPlusInterceptor;
+import com.baomidou.mybatisplus.extension.plugins.inner.PaginationInnerInterceptor;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+/**
+ * MybatisPlus配置
+ *
+ * @author qmy
+ * @version 1.0.0 2020/10/22 9:47
+ * @since JDK1.8
+ */
+@Configuration
+public class MybatisPlusConfig {
+
+ @Bean
+ public MyMetaObjectHandler myMetaObjectHandler() {
+ return new MyMetaObjectHandler();
+ }
+
+ /**
+ * 拦截器配置
+ */
+ @Bean
+ public MybatisPlusInterceptor mybatisPlusInterceptor() {
+ MybatisPlusInterceptor interceptor = new MybatisPlusInterceptor();
+ interceptor.addInnerInterceptor(this.paginationInterceptor());
+ return interceptor;
+ }
+
+ private PaginationInnerInterceptor paginationInterceptor() {
+ PaginationInnerInterceptor paginationInterceptor = new PaginationInnerInterceptor();
+ paginationInterceptor.setOverflow(false);
+ /**
+ * 注意! 此处要设置数据库类型.
+ */
+ paginationInterceptor.setDbType(DbType.POSTGRE_SQL);
+ return paginationInterceptor;
+ }
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/constant/ResultStatusEnum.java b/src/main/java/com/supervision/pdfqaserver/constant/ResultStatusEnum.java
new file mode 100644
index 0000000..471ef1e
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/constant/ResultStatusEnum.java
@@ -0,0 +1,55 @@
+package com.supervision.pdfqaserver.constant;
+
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+import lombok.Setter;
+
+/**
+ * 响应结果状态枚举类
+ * @author qimaoyu
+ * @create 2019-07-14 10:22
+ */
+@NoArgsConstructor
+@AllArgsConstructor
+public enum ResultStatusEnum {
+
+ AUTHENTICATION_FAILED(320, "token失效,请重新登录!"),
+ NO_ACCESS_TO_THIS_INTERFACE(320, "无权访问此接口!"),
+ FAILED_TO_GENERATE_TOKEN(321, "生成token失败!"),
+ ACCOUNT_PASSWORD_INCORRECT(322, "账号或密码错误!"),
+ ACCOUNT_NOT_CREATE(323, "账号未创建!"),
+ HAS_BEEN_PULLED_BLACK(324, "已被删除或禁用,无法登录!"),
+ USERNAME_MAIL_IS_EXIST(341, "登录名称已经被注册!"),
+ USERNAME_IS_BLANK(342, "登录名称为空!"),
+ VERIFICATION_CODE_EXPIRED(350,"验证码已过期,请重新获取。"),
+ VERIFICATION_CODE_FAILURE(351,"验证码输入错误。"),
+ OPERATE_FAIL(360,"修改毕业生信息失败。"),
+ DATA_IS_EMPTY(370,"查询到的结果为空"),
+ SYSTEM_ABNORMAL(500, "系统繁忙,请稍后重试!"),
+ UPLOAD_EXCEPTION(501, "文件上传异常!"),
+ EXPORT_EXCEPTION(502, "文件导出异常!"),
+ INCORRECT_FILE_FORMAT(503, "文件格式不正确!"),
+ PARAMETER_CANNOT_BE_EMPTY(504, "参数不能为空,操作失败!"),
+ NO_TEMP_UPLOADFILEPATH(505,"未配置文件上传临时存储路径"),
+ USER_DOES_NOT_EXIST(507, "用户不存在,操作失败!"),
+
+ ILLEGAL_ARGUMENT(508, "参数校验失败!"),
+ RUNTIME_EXCEPTION(509, "程序运行异常!"),
+ EXCEED_FILE_SIZE(510, "文件大小超出限制!"),
+ IMPORT_COMPANY_FORMAT_ERROR(521,"Excel表格格式错误!"),
+ IMPORT_COMPANY_FAIL(522,"部分数据导入失败"),
+ INSERT_FAIL(600,"新增失败"),
+ DuplicateKeyException(601,"该条信息已经存在,请勿重复添加"),
+ UPDATE_FAIL(700,"更新失败"),
+ DELETE_FAIL(800,"删除失败"),
+ YEAR_IS_CLOSE(1001,"该年度暂未开启");
+
+ @Getter
+ @Setter
+ private int code;
+
+ @Getter
+ @Setter
+ private String message;
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/controller/ChatController.java b/src/main/java/com/supervision/pdfqaserver/controller/ChatController.java
index 3af4fd9..598fdab 100644
--- a/src/main/java/com/supervision/pdfqaserver/controller/ChatController.java
+++ b/src/main/java/com/supervision/pdfqaserver/controller/ChatController.java
@@ -23,6 +23,11 @@ public class ChatController {
private final OllamaChatModel ollamaChatModel;
+ /**
+ * 仅供调试使用,后期移除该接口
+ * @param message
+ * @return
+ */
@PostMapping("/chat")
public R pageList(@RequestBody Map message) {
List messages = new ArrayList<>();
diff --git a/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java b/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java
new file mode 100644
index 0000000..6d11b7c
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/domain/ChineseEnglishWords.java
@@ -0,0 +1,41 @@
+package com.supervision.pdfqaserver.domain;
+
+import com.baomidou.mybatisplus.annotation.*;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+import lombok.Data;
+
+/**
+ * 中英文对照字典
+ * @TableName chinese_english_words
+ */
+@TableName(value ="chinese_english_words")
+@Data
+public class ChineseEnglishWords implements Serializable {
+ /**
+ * 中文
+ */
+ @TableId
+ private String chineseWord;
+
+ /**
+ * 英文
+ */
+ private String englishWord;
+
+ /**
+ * 创建时间
+ */
+ @TableField(fill = FieldFill.INSERT)
+ private LocalDateTime createTime;
+
+ /**
+ * 更新时间
+ */
+ @TableField(fill = FieldFill.INSERT_UPDATE)
+ private LocalDateTime updateTime;
+
+ @TableField(exist = false)
+ private static final long serialVersionUID = 1L;
+}
\ No newline at end of file
diff --git a/src/main/java/com/supervision/pdfqaserver/domain/DocumentTruncation.java b/src/main/java/com/supervision/pdfqaserver/domain/DocumentTruncation.java
new file mode 100644
index 0000000..4145bc2
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/domain/DocumentTruncation.java
@@ -0,0 +1,58 @@
+package com.supervision.pdfqaserver.domain;
+
+import com.baomidou.mybatisplus.annotation.*;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+import lombok.Data;
+
+/**
+ * 文档切分表
+ * @TableName document_truncation
+ */
+@TableName(value ="document_truncation")
+@Data
+public class DocumentTruncation implements Serializable {
+ /**
+ *
+ */
+ @TableId
+ private String id;
+
+ /**
+ * 文档id(pdf_info表的id)
+ */
+ private Integer documentId;
+
+ /**
+ * 段落id pdf_analysis_output表的id
+ */
+ private String sectionId;
+
+ /**
+ * 布局类型 0-文本 1-表格
+ */
+ private String layoutType;
+
+ private String title;
+
+ /**
+ * 片段内容
+ */
+ private String content;
+
+ /**
+ * 创建时间
+ */
+ @TableField(fill = FieldFill.INSERT)
+ private LocalDateTime createTime;
+
+ /**
+ * 更新时间
+ */
+ @TableField(fill = FieldFill.INSERT_UPDATE)
+ private LocalDateTime updateTime;
+
+ @TableField(exist = false)
+ private static final long serialVersionUID = 1L;
+}
\ No newline at end of file
diff --git a/src/main/java/com/supervision/pdfqaserver/domain/DomainMetadata.java b/src/main/java/com/supervision/pdfqaserver/domain/DomainMetadata.java
new file mode 100644
index 0000000..91695fd
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/domain/DomainMetadata.java
@@ -0,0 +1,60 @@
+package com.supervision.pdfqaserver.domain;
+
+import com.baomidou.mybatisplus.annotation.*;
+import java.io.Serializable;
+import java.time.LocalDateTime;
+import lombok.Data;
+
+/**
+ * 领域元数据
+ * @TableName domain_metadata
+ */
+@TableName(value ="domain_metadata")
+@Data
+public class DomainMetadata implements Serializable {
+ /**
+ *
+ */
+ @TableId
+ private String id;
+
+ /**
+ * 领域类型
+ */
+ private String domainType;
+
+ /**
+ * 头节点类型
+ */
+ private String sourceType;
+
+ /**
+ * 关系
+ */
+ private String relation;
+
+ /**
+ * 尾节点类型
+ */
+ private String targetType;
+
+ /**
+ * 数据来源:0=手动录入,1=系统自动
+ */
+ private String generationType;
+
+ /**
+ * 创建时间
+ */
+ @TableField(fill = FieldFill.INSERT)
+ private LocalDateTime createTime;
+
+ /**
+ * 更新时间
+ */
+ @TableField(fill = FieldFill.INSERT_UPDATE)
+ private LocalDateTime updateTime;
+
+ @TableField(exist = false)
+ private static final long serialVersionUID = 1L;
+}
\ No newline at end of file
diff --git a/src/main/java/com/supervision/pdfqaserver/domain/PdfAnalysisOutput.java b/src/main/java/com/supervision/pdfqaserver/domain/PdfAnalysisOutput.java
new file mode 100644
index 0000000..4f60a67
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/domain/PdfAnalysisOutput.java
@@ -0,0 +1,60 @@
+package com.supervision.pdfqaserver.domain;
+
+import com.baomidou.mybatisplus.annotation.*;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+import lombok.Data;
+
+/**
+ *
+ * @TableName pdf_analysis_output
+ */
+@TableName(value ="pdf_analysis_output")
+@Data
+public class PdfAnalysisOutput implements Serializable {
+ /**
+ *
+ */
+ @TableId
+ private Integer id;
+
+ /**
+ * 0-文本 1-表格
+ */
+ private Integer layoutType;
+
+ /**
+ * pdf段落内容
+ */
+ private String content;
+
+ /**
+ * pdf页码
+ */
+ private Integer pageNo;
+
+ /**
+ * pdf_info表的主键
+ */
+ private Integer pdfId;
+
+ /**
+ * 表格标题
+ */
+ private String tableTitle;
+
+ /**
+ * 内容在pdf页面中的顺序,越小表示顺序越靠前
+ */
+ private Integer order;
+
+ /**
+ *
+ */
+ @TableField(fill = FieldFill.INSERT)
+ private LocalDateTime createTime;
+
+ @TableField(exist = false)
+ private static final long serialVersionUID = 1L;
+}
\ No newline at end of file
diff --git a/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java b/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java
new file mode 100644
index 0000000..e094002
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/domain/PdfInfo.java
@@ -0,0 +1,39 @@
+package com.supervision.pdfqaserver.domain;
+
+import com.baomidou.mybatisplus.annotation.*;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+import lombok.Data;
+
+/**
+ * pdf信息
+ * @TableName pdf_info
+ */
+@TableName(value ="pdf_info")
+@Data
+public class PdfInfo implements Serializable {
+ /**
+ *
+ */
+ private Integer id;
+
+ /**
+ * pdf路径
+ */
+ private String path;
+
+ /**
+ * 文件名
+ */
+ private String filename;
+
+ /**
+ * 创建时间
+ */
+ @TableField(fill = FieldFill.INSERT)
+ private LocalDateTime createTime;
+
+ @TableField(exist = false)
+ private static final long serialVersionUID = 1L;
+}
\ No newline at end of file
diff --git a/src/main/java/com/supervision/pdfqaserver/domain/TruncationEntityExtraction.java b/src/main/java/com/supervision/pdfqaserver/domain/TruncationEntityExtraction.java
new file mode 100644
index 0000000..6d2fb49
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/domain/TruncationEntityExtraction.java
@@ -0,0 +1,50 @@
+package com.supervision.pdfqaserver.domain;
+
+import com.baomidou.mybatisplus.annotation.*;
+import java.io.Serializable;
+import java.time.LocalDateTime;
+import lombok.Data;
+
+/**
+ * 片段实体抽取
+ * @TableName truncation_entity_extraction
+ */
+@TableName(value ="truncation_entity_extraction")
+@Data
+public class TruncationEntityExtraction implements Serializable {
+ /**
+ * 主键
+ */
+ @TableId
+ private String id;
+
+ /**
+ * 片段id document_truncation表的id
+ */
+ private String truncationId;
+
+ /**
+ * 标签(实体类型)
+ */
+ private String entity;
+
+ /**
+ * 实体名
+ */
+ private String name;
+
+ /**
+ * 创建时间
+ */
+ @TableField(fill = FieldFill.INSERT)
+ private LocalDateTime createTime;
+
+ /**
+ * 更新时间
+ */
+ @TableField(fill = FieldFill.INSERT_UPDATE)
+ private LocalDateTime updateTime;
+
+ @TableField(exist = false)
+ private static final long serialVersionUID = 1L;
+}
\ No newline at end of file
diff --git a/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java b/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java
new file mode 100644
index 0000000..ced3840
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/domain/TruncationErAttribute.java
@@ -0,0 +1,61 @@
+package com.supervision.pdfqaserver.domain;
+
+import com.baomidou.mybatisplus.annotation.*;
+
+import java.io.Serializable;
+import java.time.LocalDateTime;
+import lombok.Data;
+
+/**
+ * 实体表
+ * @TableName truncation_er_attribute
+ */
+@TableName(value ="truncation_er_attribute")
+@Data
+public class TruncationErAttribute implements Serializable {
+ /**
+ *
+ */
+ @TableId
+ private String id;
+
+ /**
+ * 片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id
+ */
+ private String terId;
+
+ /**
+ * 类型 0:terId关联的id为实体 1:terId关联的id为关系
+ */
+ private String type;
+
+ /**
+ * 实体名
+ */
+ private String attribute;
+
+ /**
+ * 值
+ */
+ private String value;
+
+ /**
+ * 数据类型 0:字符串 1:数字
+ */
+ private String dataType;
+
+ /**
+ * 创建时间
+ */
+ @TableField(fill = FieldFill.INSERT)
+ private LocalDateTime createTime;
+
+ /**
+ * 更新时间
+ */
+ @TableField(fill = FieldFill.INSERT_UPDATE)
+ private LocalDateTime updateTime;
+
+ @TableField(exist = false)
+ private static final long serialVersionUID = 1L;
+}
\ No newline at end of file
diff --git a/src/main/java/com/supervision/pdfqaserver/domain/TruncationRelationExtraction.java b/src/main/java/com/supervision/pdfqaserver/domain/TruncationRelationExtraction.java
new file mode 100644
index 0000000..9471d89
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/domain/TruncationRelationExtraction.java
@@ -0,0 +1,65 @@
+package com.supervision.pdfqaserver.domain;
+
+import com.baomidou.mybatisplus.annotation.*;
+import java.io.Serializable;
+import java.time.LocalDateTime;
+import lombok.Data;
+
+/**
+ * 片段关系抽取
+ * @TableName truncation_relation_extraction
+ */
+@TableName(value ="truncation_relation_extraction")
+@Data
+public class TruncationRelationExtraction implements Serializable {
+ /**
+ *
+ */
+ @TableId
+ private String id;
+
+ /**
+ *
+ */
+ private String truncationId;
+
+ /**
+ * 头节点
+ */
+ private String source;
+
+ /**
+ * 头节点类型
+ */
+ private String sourceType;
+
+ /**
+ * 尾节点
+ */
+ private String target;
+
+ /**
+ * 尾节点类型
+ */
+ private String targetType;
+
+ /**
+ * 关系
+ */
+ private String relation;
+
+ /**
+ * 创建时间
+ */
+ @TableField(fill = FieldFill.INSERT)
+ private LocalDateTime createTime;
+
+ /**
+ * 更新时间
+ */
+ @TableField(fill = FieldFill.INSERT_UPDATE)
+ private LocalDateTime updateTime;
+
+ @TableField(exist = false)
+ private static final long serialVersionUID = 1L;
+}
\ No newline at end of file
diff --git a/src/main/java/com/supervision/pdfqaserver/dto/DocumentDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/DocumentDTO.java
index deb4096..c3af439 100644
--- a/src/main/java/com/supervision/pdfqaserver/dto/DocumentDTO.java
+++ b/src/main/java/com/supervision/pdfqaserver/dto/DocumentDTO.java
@@ -1,5 +1,6 @@
package com.supervision.pdfqaserver.dto;
+import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
import lombok.Data;
/**
@@ -7,19 +8,29 @@ import lombok.Data;
*/
@Data
public class DocumentDTO {
+
/**
* 文档id
*/
private String id;
+
+ private Integer documentId;
+
+ private Integer sectionId;
+
+ private Integer pageNo;
+
/**
- * 文档序号
+ * 内容类型 0:文本 1:表格
*/
- private Integer index;
+ private String layoutType;
/**
- * 内容类型 0:文本 1:表格
+ * 内容在pdf页面中的顺序,越小表示顺序越靠前
*/
- private String type;
+ private Integer layoutOrder;
+
+ private String title;
/**
* 文档内容
@@ -32,10 +43,21 @@ public class DocumentDTO {
private Integer pageNum;
- /**
- * 文件名
- */
- private String fileName;
+ public DocumentDTO() {
+ }
+
+ public DocumentDTO(PdfAnalysisOutput pdfAnalysisOutput) {
+ this.sectionId = pdfAnalysisOutput.getId();
+ this.documentId = pdfAnalysisOutput.getPdfId();
+ if (null != pdfAnalysisOutput.getLayoutType()) {
+ this.layoutType = pdfAnalysisOutput.getLayoutType().toString();
+ }
+ this.pageNo = pdfAnalysisOutput.getPageNo();
+ this.title = pdfAnalysisOutput.getTableTitle();
+ this.content = pdfAnalysisOutput.getContent();
+ this.layoutOrder = pdfAnalysisOutput.getOrder();
+
+ }
}
diff --git a/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java
new file mode 100644
index 0000000..bf275f3
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/dto/DomainMetadataDTO.java
@@ -0,0 +1,35 @@
+package com.supervision.pdfqaserver.dto;
+
+import lombok.Data;
+
+@Data
+public class DomainMetadataDTO {
+
+ private String id;
+
+ /**
+ * 领域类型
+ */
+ private String domainType;
+
+ /**
+ * 头节点类型
+ */
+ private String sourceType;
+
+ /**
+ * 关系
+ */
+ private String relation;
+
+ /**
+ * 尾节点类型
+ */
+ private String targetType;
+
+ /**
+ * 数据来源:0=手动录入,1=系统自动
+ */
+ private String generationType;
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java
new file mode 100644
index 0000000..5064e7c
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/dto/ERAttributeDTO.java
@@ -0,0 +1,46 @@
+package com.supervision.pdfqaserver.dto;
+
+import lombok.Data;
+
+/**
+ * 实体属性
+ */
+@Data
+public class ERAttributeDTO {
+
+ private String id;
+
+ /**
+ * 片段实体属性表 既可以是truncation_entity_extraction表id也可以是truncation_relation_extraction表id
+ */
+ private String terId;
+
+ /**
+ * 类型 0:terId关联的id为实体 1:terId关联的id为关系
+ */
+ private String type;
+
+ /**
+ * 属性名
+ */
+ private String attribute;
+
+ /**
+ * 属性值
+ */
+ private String value;
+
+ /**
+ * 数据类型 0:字符串 1:数字
+ */
+ private String dataType;
+
+ public ERAttributeDTO() {
+ }
+
+ public ERAttributeDTO(String attribute, String value, String dataType) {
+ this.attribute = attribute;
+ this.value = value;
+ this.dataType = dataType;
+ }
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java
new file mode 100644
index 0000000..6eda549
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/dto/EREDTO.java
@@ -0,0 +1,124 @@
+package com.supervision.pdfqaserver.dto;
+
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.util.StrUtil;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import lombok.Data;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.*;
+
+/**
+ * 实体关系抽取
+ */
+@Slf4j
+@Data
+public class EREDTO {
+
+ private List entities;
+
+ private List relations;
+
+ public EREDTO() {
+ }
+
+ public static EREDTO fromTextJson(String json,String truncationId) {
+ EREDTO eredto = new EREDTO();
+ JSONObject jsonObject = JSONObject.parseObject(json);
+ JSONArray nodes = jsonObject.getJSONArray("nodes");
+ JSONArray relations = jsonObject.getJSONArray("relations");
+ List entities = new ArrayList<>();
+ List relationsList = new ArrayList<>();
+ if (CollUtil.isNotEmpty(nodes)){
+ for (Object node : nodes) {
+ JSONObject nodeJson = (JSONObject) node;
+ String name = nodeJson.getString("name");
+ String type = nodeJson.getString("type");
+ JSONObject attributes = nodeJson.getJSONObject("attributes");
+ if (CollUtil.isNotEmpty(attributes)){
+ List erAttributeDTOS = new ArrayList<>();
+ for (String key : attributes.keySet()) {
+ Object value = attributes.get(key);
+ String valueString = attributes.getString(key);
+ ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0");
+ erAttributeDTOS.add(erAttributeDTO);
+ }
+ EntityExtractionDTO entityExtraction = new EntityExtractionDTO(truncationId,name,type, erAttributeDTOS);
+ entities.add(entityExtraction);
+ }
+ }
+ }
+ if (CollUtil.isNotEmpty(relations)){
+ for (Object relation : relations) {
+ JSONObject relationJson = (JSONObject) relation;
+ String source = relationJson.getString("source");
+ String target = relationJson.getString("target");
+ String type = relationJson.getString("type");
+ JSONObject attributes = relationJson.getJSONObject("attributes");
+ if (CollUtil.isNotEmpty(attributes)){
+ List erAttributeDTOS = new ArrayList<>();
+ for (String key : attributes.keySet()) {
+ Object value = attributes.get(key);
+ String valueString = attributes.getString(key);
+ ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, valueString, value instanceof Number?"1":"0");
+ erAttributeDTOS.add(erAttributeDTO);
+ }
+ if (StrUtil.isEmpty(source) || StrUtil.isEmpty(target)){
+ log.warn("truncationId:{} relation:{} 关系中source or target is empty",truncationId,relationJson);
+ continue;
+ }
+ Optional sourceTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), source)).findFirst();
+ if (sourceTypeOpt.isEmpty()){
+ log.warn("truncationId:{} relation:{} 关系中source在实体中不存在",truncationId,relationJson);
+ continue;
+ }
+ Optional targetTypeOpt = entities.stream().filter(e -> StrUtil.equals(e.getEntity(), target)).findFirst();
+ if (targetTypeOpt.isEmpty()){
+ log.warn("truncationId:{} relation:{} 关系中target在实体中不存在",truncationId,relationJson);
+ continue;
+ }
+ RelationExtractionDTO relationExtractionDTO = new RelationExtractionDTO(truncationId,source,
+ sourceTypeOpt.get().getEntity(),type,target,targetTypeOpt.get().getEntity(), erAttributeDTOS);
+ relationsList.add(relationExtractionDTO);
+ }
+ }
+ }
+ eredto.setEntities(entities);
+ eredto.setRelations(relationsList);
+ return eredto;
+ }
+
+ public static EREDTO fromTableJson(String json,String truncationId) {
+
+ EREDTO eredto = new EREDTO();
+ JSONObject jsonObject = JSONObject.parseObject(json);
+ JSONArray tables = jsonObject.getJSONArray("table_data");
+
+ if (CollUtil.isEmpty(tables)){
+ return eredto;
+ }
+ List entities = new ArrayList<>();
+ for (Object table : tables) {
+ JSONObject tableJson = (JSONObject) table;
+ if (CollUtil.isEmpty(tableJson)){
+ continue;
+ }
+ EntityExtractionDTO entityExtractionDTO = new EntityExtractionDTO();
+ entityExtractionDTO.setEntity("row");
+ entityExtractionDTO.setName("row");
+ entityExtractionDTO.setTruncationId(truncationId);
+ List erAttributeDTOS = new ArrayList<>();
+ for (Map.Entry tableEntry : tableJson.entrySet()) {
+ String key = tableEntry.getKey();
+ Object value = tableEntry.getValue();
+ ERAttributeDTO erAttributeDTO = new ERAttributeDTO(key, value.toString(), value instanceof Number ? "1" : "0");
+ erAttributeDTOS.add(erAttributeDTO);
+ }
+ entityExtractionDTO.setAttributes(erAttributeDTOS);
+ entities.add(entityExtractionDTO);
+ }
+ eredto.setEntities(entities);
+ return eredto;
+ }
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java
new file mode 100644
index 0000000..30cb118
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/dto/EntityExtractionDTO.java
@@ -0,0 +1,37 @@
+package com.supervision.pdfqaserver.dto;
+
+import lombok.Data;
+import java.util.List;
+
+/**
+ * 实体抽取
+ */
+@Data
+public class EntityExtractionDTO {
+
+ private String id;
+
+ private String truncationId;
+
+ /**
+ * 实体标签
+ */
+ private String entity;
+
+ /**
+ * 实体名
+ */
+ private String name;
+
+ private List attributes;
+
+ public EntityExtractionDTO() {
+ }
+
+ public EntityExtractionDTO(String truncationId, String entity, String name, List attributes) {
+ this.truncationId = truncationId;
+ this.entity = entity;
+ this.name = name;
+ this.attributes = attributes;
+ }
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java
new file mode 100644
index 0000000..1b35063
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/dto/RelationExtractionDTO.java
@@ -0,0 +1,55 @@
+package com.supervision.pdfqaserver.dto;
+
+import lombok.Data;
+import java.util.List;
+
+/**
+ * 关系抽取
+ */
+@Data
+public class RelationExtractionDTO {
+
+ private String id;
+
+ private String truncationId;
+
+ /**
+ * 头节点数据
+ */
+ private String source;
+
+ /**
+ * 头节点类型
+ */
+ private String sourceType;
+
+ /**
+ *关系
+ */
+ private String relation;
+
+ /**
+ * 尾节点数据
+ */
+ private String target;
+
+ /**
+ * 尾节点类型
+ */
+ private String targetType;
+
+ private List attributes;
+
+ public RelationExtractionDTO() {
+ }
+
+ public RelationExtractionDTO(String truncationId,String source, String sourceType,String relation, String target,String targetType, List attributes) {
+ this.truncationId = truncationId;
+ this.source = source;
+ this.relation = relation;
+ this.target = target;
+ this.attributes = attributes;
+ this.sourceType = sourceType;
+ this.targetType = targetType;
+ }
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java b/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java
index 1c27e52..db4a5d2 100644
--- a/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java
+++ b/src/main/java/com/supervision/pdfqaserver/dto/TruncateDTO.java
@@ -1,5 +1,6 @@
package com.supervision.pdfqaserver.dto;
+import com.supervision.pdfqaserver.domain.DocumentTruncation;
import lombok.Data;
/**
@@ -14,19 +15,43 @@ public class TruncateDTO {
private String id;
/**
- * 分段类型 0:文本 1:表格
+ * 布局类型 0-文本 1-表格
*/
- private String type;
+ private String layoutType;
/**
- * 分段内容
+ * 文档id(pdf_info表的id)
*/
- private String content;
+ private Integer documentId;
+
+ /**
+ * 段落id pdf_analysis_output表的id
+ */
+ private String sectionId;
/**
* 表格标题
*/
private String title;
+ /**
+ * 分段内容
+ */
+ private String content;
+
+
+ public DocumentTruncation toDocumentTruncation() {
+ DocumentTruncation truncation = new DocumentTruncation();
+ truncation.setDocumentId(this.documentId);
+ truncation.setSectionId(this.sectionId);
+ truncation.setLayoutType(this.layoutType);
+ truncation.setTitle(this.title);
+ truncation.setContent(this.content);
+ return truncation;
+ }
+
+
+
+
}
diff --git a/src/main/java/com/supervision/pdfqaserver/exception/BusinessException.java b/src/main/java/com/supervision/pdfqaserver/exception/BusinessException.java
new file mode 100644
index 0000000..ff1654d
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/exception/BusinessException.java
@@ -0,0 +1,76 @@
+/*
+ * 文 件 名: CustomException
+ * 版 权:
+ * 描 述: <描述>
+ * 修 改 人: RedName
+ * 修改时间: 2022/8/5
+ * 跟踪单号: <跟踪单号>
+ * 修改单号: <修改单号>
+ * 修改内容: <修改内容>
+ */
+package com.supervision.pdfqaserver.exception;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.http.HttpStatus;
+
+/**
+ * <功能详细描述>
+ * 自定义异常
+ *
+ * @author ljt
+ * @version [版本号, 2022/8/5]
+ * @see [相关类/方法]
+ * @since [产品/模块版本]
+ */
+@Slf4j
+public class BusinessException extends RuntimeException {
+ /**
+ * 异常编码
+ */
+ private final Integer code;
+
+ /**
+ * 异常信息
+ */
+ private final String message;
+
+ public BusinessException(Throwable cause) {
+ super(cause);
+ this.code = HttpStatus.INTERNAL_SERVER_ERROR.value();
+ this.message = null;
+
+ }
+
+ public BusinessException(Throwable cause, String message) {
+ super(cause);
+ this.code = HttpStatus.INTERNAL_SERVER_ERROR.value();
+ this.message = message;
+
+ }
+
+ public BusinessException(String message) {
+ this.code = HttpStatus.INTERNAL_SERVER_ERROR.value();
+ this.message = message;
+ }
+
+ public BusinessException(String message, Integer code) {
+ this.message = message;
+ this.code = code;
+ }
+
+ public BusinessException(String message, Throwable e) {
+ super(message, e);
+ log.error(message, e);
+ this.code = HttpStatus.INTERNAL_SERVER_ERROR.value();
+ this.message = message;
+ }
+
+ @Override
+ public String getMessage() {
+ return message;
+ }
+
+ public Integer getCode() {
+ return code;
+ }
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/ChineseEnglishWordsMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/ChineseEnglishWordsMapper.java
new file mode 100644
index 0000000..e477948
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/mapper/ChineseEnglishWordsMapper.java
@@ -0,0 +1,18 @@
+package com.supervision.pdfqaserver.mapper;
+
+import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+
+/**
+* @author Administrator
+* @description 针对表【chinese_english_words(中英文对照字典)】的数据库操作Mapper
+* @createDate 2025-04-27 11:45:24
+* @Entity com.supervision.pdfqaserver.domain.ChineseEnglishWords
+*/
+public interface ChineseEnglishWordsMapper extends BaseMapper {
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java
new file mode 100644
index 0000000..92c228c
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/mapper/DocumentTruncationMapper.java
@@ -0,0 +1,18 @@
+package com.supervision.pdfqaserver.mapper;
+
+import com.supervision.pdfqaserver.domain.DocumentTruncation;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+
+/**
+* @author Administrator
+* @description 针对表【document_truncation(文档切分表)】的数据库操作Mapper
+* @createDate 2025-04-27 11:45:24
+* @Entity com.supervision.pdfqaserver.domain.DocumentTruncation
+*/
+public interface DocumentTruncationMapper extends BaseMapper {
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/DomainMetadataMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/DomainMetadataMapper.java
new file mode 100644
index 0000000..a48a7ee
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/mapper/DomainMetadataMapper.java
@@ -0,0 +1,18 @@
+package com.supervision.pdfqaserver.mapper;
+
+import com.supervision.pdfqaserver.domain.DomainMetadata;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+
+/**
+* @author Administrator
+* @description 针对表【domain_metadata(领域元数据)】的数据库操作Mapper
+* @createDate 2025-04-27 11:45:24
+* @Entity com.supervision.pdfqaserver.domain.DomainMetadata
+*/
+public interface DomainMetadataMapper extends BaseMapper {
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/PdfAnalysisOutputMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/PdfAnalysisOutputMapper.java
new file mode 100644
index 0000000..f2a1f49
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/mapper/PdfAnalysisOutputMapper.java
@@ -0,0 +1,18 @@
+package com.supervision.pdfqaserver.mapper;
+
+import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+
+/**
+* @author Administrator
+* @description 针对表【pdf_analysis_output】的数据库操作Mapper
+* @createDate 2025-04-27 11:45:24
+* @Entity com.supervision.pdfqaserver.domain.PdfAnalysisOutput
+*/
+public interface PdfAnalysisOutputMapper extends BaseMapper {
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/PdfInfoMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/PdfInfoMapper.java
new file mode 100644
index 0000000..acf6944
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/mapper/PdfInfoMapper.java
@@ -0,0 +1,18 @@
+package com.supervision.pdfqaserver.mapper;
+
+import com.supervision.pdfqaserver.domain.PdfInfo;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+
+/**
+* @author Administrator
+* @description 针对表【pdf_info(pdf信息)】的数据库操作Mapper
+* @createDate 2025-04-27 11:45:24
+* @Entity com.supervision.pdfqaserver.domain.PdfInfo
+*/
+public interface PdfInfoMapper extends BaseMapper {
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/TruncationEntityExtractionMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationEntityExtractionMapper.java
new file mode 100644
index 0000000..e451a27
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationEntityExtractionMapper.java
@@ -0,0 +1,18 @@
+package com.supervision.pdfqaserver.mapper;
+
+import com.supervision.pdfqaserver.domain.TruncationEntityExtraction;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+
+/**
+* @author Administrator
+* @description 针对表【truncation_entity_extraction(片段实体抽取)】的数据库操作Mapper
+* @createDate 2025-04-27 11:45:24
+* @Entity com.supervision.pdfqaserver.domain.TruncationEntityExtraction
+*/
+public interface TruncationEntityExtractionMapper extends BaseMapper {
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/TruncationErAttributeMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationErAttributeMapper.java
new file mode 100644
index 0000000..90483be
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationErAttributeMapper.java
@@ -0,0 +1,18 @@
+package com.supervision.pdfqaserver.mapper;
+
+import com.supervision.pdfqaserver.domain.TruncationErAttribute;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+
+/**
+* @author Administrator
+* @description 针对表【truncation_er_attribute(实体表)】的数据库操作Mapper
+* @createDate 2025-04-27 11:45:24
+* @Entity com.supervision.pdfqaserver.domain.TruncationErAttribute
+*/
+public interface TruncationErAttributeMapper extends BaseMapper {
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/mapper/TruncationRelationExtractionMapper.java b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationRelationExtractionMapper.java
new file mode 100644
index 0000000..7ccc5c5
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/mapper/TruncationRelationExtractionMapper.java
@@ -0,0 +1,18 @@
+package com.supervision.pdfqaserver.mapper;
+
+import com.supervision.pdfqaserver.domain.TruncationRelationExtraction;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+
+/**
+* @author Administrator
+* @description 针对表【truncation_relation_extraction(片段关系抽取)】的数据库操作Mapper
+* @createDate 2025-04-27 11:45:24
+* @Entity com.supervision.pdfqaserver.domain.TruncationRelationExtraction
+*/
+public interface TruncationRelationExtractionMapper extends BaseMapper {
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/service/ChineseEnglishWordsService.java b/src/main/java/com/supervision/pdfqaserver/service/ChineseEnglishWordsService.java
new file mode 100644
index 0000000..3e25515
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/ChineseEnglishWordsService.java
@@ -0,0 +1,13 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
+import com.baomidou.mybatisplus.extension.service.IService;
+
+/**
+* @author Administrator
+* @description 针对表【chinese_english_words(中英文对照字典)】的数据库操作Service
+* @createDate 2025-04-27 11:45:24
+*/
+public interface ChineseEnglishWordsService extends IService {
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/DocumentSlicer.java b/src/main/java/com/supervision/pdfqaserver/service/DocumentSlicer.java
deleted file mode 100644
index 87916fa..0000000
--- a/src/main/java/com/supervision/pdfqaserver/service/DocumentSlicer.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package com.supervision.pdfqaserver.service;
-
-import com.supervision.pdfqaserver.dto.DocumentDTO;
-import com.supervision.pdfqaserver.dto.TruncateDTO;
-
-import java.util.List;
-
-/**
- * 文档切分器
- */
-public interface DocumentSlicer {
-
- /**
- * 切分文档
- * @param documents 文档列表
- * @return
- */
- List slice(List documents);
-}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java b/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java
new file mode 100644
index 0000000..21be737
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/DocumentTruncationService.java
@@ -0,0 +1,18 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.domain.DocumentTruncation;
+import com.baomidou.mybatisplus.extension.service.IService;
+import com.supervision.pdfqaserver.dto.TruncateDTO;
+
+import java.util.List;
+
+/**
+* @author Administrator
+* @description 针对表【document_truncation(文档切分表)】的数据库操作Service
+* @createDate 2025-04-27 11:45:24
+*/
+public interface DocumentTruncationService extends IService {
+
+
+ void batchSave(List truncateDTOS);
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java
new file mode 100644
index 0000000..c1f5e1b
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/DomainMetadataService.java
@@ -0,0 +1,13 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.domain.DomainMetadata;
+import com.baomidou.mybatisplus.extension.service.IService;
+
+/**
+* @author Administrator
+* @description 针对表【domain_metadata(领域元数据)】的数据库操作Service
+* @createDate 2025-04-27 11:45:24
+*/
+public interface DomainMetadataService extends IService {
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java b/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java
new file mode 100644
index 0000000..62993ad
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/KnowledgeGraphService.java
@@ -0,0 +1,21 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.dto.EREDTO;
+
+/**
+ * 知识图谱服务接口
+ */
+public interface KnowledgeGraphService {
+
+
+ /**
+ * 生成知识图谱
+ * @param documentId 文档ID
+ */
+ void generateGraph(String documentId);
+
+ void queryGraph(String databaseId, String query);
+
+
+ void saveERE(EREDTO eredto, String truncationId);
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java b/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java
new file mode 100644
index 0000000..f534c5f
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/PdfAnalysisOutputService.java
@@ -0,0 +1,16 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
+import com.baomidou.mybatisplus.extension.service.IService;
+
+import java.util.List;
+
+/**
+* @author Administrator
+* @description 针对表【pdf_analysis_output】的数据库操作Service
+* @createDate 2025-04-27 11:45:24
+*/
+public interface PdfAnalysisOutputService extends IService {
+
+ List queryByPdfId(String pdfId);
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java b/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java
new file mode 100644
index 0000000..e5ef730
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/PdfInfoService.java
@@ -0,0 +1,13 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.domain.PdfInfo;
+import com.baomidou.mybatisplus.extension.service.IService;
+
+/**
+* @author Administrator
+* @description 针对表【pdf_info(pdf信息)】的数据库操作Service
+* @createDate 2025-04-27 11:45:24
+*/
+public interface PdfInfoService extends IService {
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java b/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java
new file mode 100644
index 0000000..094f16f
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/TripleConversionPipeline.java
@@ -0,0 +1,36 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.dto.EREDTO;
+import com.supervision.pdfqaserver.dto.DocumentDTO;
+import com.supervision.pdfqaserver.dto.TruncateDTO;
+
+import java.util.List;
+
+/**
+ * 三元组转换管道
+ */
+public interface TripleConversionPipeline {
+
+ /**
+ * 切分文档
+ * @param documents 文档列表
+ * @return
+ */
+ List sliceDocuments(List documents);
+
+
+ /**
+ * 实体关系抽取
+ * @param truncateDTO 切分文档
+ * @return
+ */
+ EREDTO doEre(TruncateDTO truncateDTO);
+
+ /**
+ * 合并实体关系抽取结果
+ * @param eredtoList 实体关系抽取结果列表
+ * @return
+ */
+ List mergeEreResults(List eredtoList);
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/TripleToCypherExecutor.java b/src/main/java/com/supervision/pdfqaserver/service/TripleToCypherExecutor.java
new file mode 100644
index 0000000..d48050d
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/TripleToCypherExecutor.java
@@ -0,0 +1,31 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.dto.EREDTO;
+
+/**
+ * 三元组转换为Cypher语句的执行器
+ */
+public interface TripleToCypherExecutor {
+
+ /**
+ * 生成Cypher语句
+ * @param eredto
+ * @return
+ */
+ String generateInsertCypher(EREDTO eredto);
+
+
+ /**
+ * 生成查询Cypher语句
+ * @param query
+ * @return
+ */
+ String generateQueryCypher(String query);
+
+ /**
+ * 执行Cypher语句
+ * @param cypher
+ * @return
+ */
+ void executeCypher(String cypher);
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/TruncationEntityExtractionService.java b/src/main/java/com/supervision/pdfqaserver/service/TruncationEntityExtractionService.java
new file mode 100644
index 0000000..99f4ecc
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/TruncationEntityExtractionService.java
@@ -0,0 +1,13 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.domain.TruncationEntityExtraction;
+import com.baomidou.mybatisplus.extension.service.IService;
+
+/**
+* @author Administrator
+* @description 针对表【truncation_entity_extraction(片段实体抽取)】的数据库操作Service
+* @createDate 2025-04-27 11:45:24
+*/
+public interface TruncationEntityExtractionService extends IService {
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/TruncationErAttributeService.java b/src/main/java/com/supervision/pdfqaserver/service/TruncationErAttributeService.java
new file mode 100644
index 0000000..35880b4
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/TruncationErAttributeService.java
@@ -0,0 +1,13 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.domain.TruncationErAttribute;
+import com.baomidou.mybatisplus.extension.service.IService;
+
+/**
+* @author Administrator
+* @description 针对表【truncation_er_attribute(实体表)】的数据库操作Service
+* @createDate 2025-04-27 11:45:24
+*/
+public interface TruncationErAttributeService extends IService {
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/TruncationRelationExtractionService.java b/src/main/java/com/supervision/pdfqaserver/service/TruncationRelationExtractionService.java
new file mode 100644
index 0000000..666e7b7
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/TruncationRelationExtractionService.java
@@ -0,0 +1,13 @@
+package com.supervision.pdfqaserver.service;
+
+import com.supervision.pdfqaserver.domain.TruncationRelationExtraction;
+import com.baomidou.mybatisplus.extension.service.IService;
+
+/**
+* @author Administrator
+* @description 针对表【truncation_relation_extraction(片段关系抽取)】的数据库操作Service
+* @createDate 2025-04-27 11:45:24
+*/
+public interface TruncationRelationExtractionService extends IService {
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/ChineseEnglishWordsServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/ChineseEnglishWordsServiceImpl.java
new file mode 100644
index 0000000..57106ef
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/ChineseEnglishWordsServiceImpl.java
@@ -0,0 +1,22 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.supervision.pdfqaserver.domain.ChineseEnglishWords;
+import com.supervision.pdfqaserver.service.ChineseEnglishWordsService;
+import com.supervision.pdfqaserver.mapper.ChineseEnglishWordsMapper;
+import org.springframework.stereotype.Service;
+
+/**
+* @author Administrator
+* @description 针对表【chinese_english_words(中英文对照字典)】的数据库操作Service实现
+* @createDate 2025-04-27 11:45:24
+*/
+@Service
+public class ChineseEnglishWordsServiceImpl extends ServiceImpl
+ implements ChineseEnglishWordsService{
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java
new file mode 100644
index 0000000..390b2f4
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DocumentTruncationServiceImpl.java
@@ -0,0 +1,33 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import cn.hutool.core.collection.CollUtil;
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.supervision.pdfqaserver.domain.DocumentTruncation;
+import com.supervision.pdfqaserver.dto.TruncateDTO;
+import com.supervision.pdfqaserver.service.DocumentTruncationService;
+import com.supervision.pdfqaserver.mapper.DocumentTruncationMapper;
+import org.springframework.stereotype.Service;
+
+import java.util.List;
+
+/**
+* @author Administrator
+* @description 针对表【document_truncation(文档切分表)】的数据库操作Service实现
+* @createDate 2025-04-27 11:45:24
+*/
+@Service
+public class DocumentTruncationServiceImpl extends ServiceImpl
+ implements DocumentTruncationService{
+
+ @Override
+ public void batchSave(List truncateDTOS) {
+ if (CollUtil.isEmpty(truncateDTOS)){
+ return;
+ }
+ truncateDTOS.stream().map(TruncateDTO::toDocumentTruncation).forEach(this::save);
+ }
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java
new file mode 100644
index 0000000..d246477
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/DomainMetadataServiceImpl.java
@@ -0,0 +1,22 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.supervision.pdfqaserver.domain.DomainMetadata;
+import com.supervision.pdfqaserver.service.DomainMetadataService;
+import com.supervision.pdfqaserver.mapper.DomainMetadataMapper;
+import org.springframework.stereotype.Service;
+
+/**
+* @author Administrator
+* @description 针对表【domain_metadata(领域元数据)】的数据库操作Service实现
+* @createDate 2025-04-27 11:45:24
+*/
+@Service
+public class DomainMetadataServiceImpl extends ServiceImpl
+ implements DomainMetadataService{
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java
new file mode 100644
index 0000000..06364c7
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/KnowledgeGraphServiceImpl.java
@@ -0,0 +1,84 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import cn.hutool.core.collection.CollUtil;
+import com.supervision.pdfqaserver.dto.EREDTO;
+import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
+import com.supervision.pdfqaserver.dto.DocumentDTO;
+import com.supervision.pdfqaserver.dto.TruncateDTO;
+import com.supervision.pdfqaserver.service.*;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+
+import java.util.ArrayList;
+import java.util.List;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
+
+ private final TripleConversionPipeline tripleConversionPipeline;
+
+ private final TripleToCypherExecutor tripleToCypherExecutor;
+
+ private final ChineseEnglishWordsService chineseEnglishWordsService;
+
+ private final DocumentTruncationService documentTruncationService;
+
+ private final DomainMetadataService domainMetadataService;
+
+ private final PdfAnalysisOutputService pdfAnalysisOutputService;
+
+ private final PdfInfoService pdfInfoService;
+
+ private final TruncationEntityExtractionService truncationEntityExtractionService;
+
+ private final TruncationRelationExtractionService relationExtractionService;
+
+ private final TruncationErAttributeService truncationErAttributeService;
+
+ @Override
+ public void generateGraph(String documentId) {
+ List pdfAnalysisOutputs = pdfAnalysisOutputService.queryByPdfId(documentId);
+ if (CollUtil.isEmpty(pdfAnalysisOutputs)) {
+ log.info("没有找到pdfId为{}的pdf分析结果", documentId);
+ return;
+ }
+ List documentDTOList = pdfAnalysisOutputs.stream().map(DocumentDTO::new).toList();
+ // 对文档进行切分
+ List truncateDTOS = tripleConversionPipeline.sliceDocuments(documentDTOList);
+ // 保存分片信息
+ documentTruncationService.batchSave(truncateDTOS);
+
+ // 对切分后的文档进行命名实体识别
+ List eredtoList = new ArrayList<>();
+ for (TruncateDTO truncateDTO : truncateDTOS) {
+ EREDTO eredto = tripleConversionPipeline.doEre(truncateDTO);
+ // 保存实体关系抽取结果
+ this.saveERE(eredto, truncateDTO.getId());
+ }
+
+ // 合并实体关系抽取结果
+ List mergedList = tripleConversionPipeline.mergeEreResults(eredtoList);
+
+ for (EREDTO eredto : mergedList) {
+ String insertCypher = tripleToCypherExecutor.generateInsertCypher(eredto);
+
+ tripleToCypherExecutor.executeCypher(insertCypher);
+ }
+
+
+ }
+
+ @Override
+ public void queryGraph(String databaseId, String query) {
+
+ }
+
+ @Override
+ public void saveERE(EREDTO eredto, String truncationId) {
+
+ }
+
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java
new file mode 100644
index 0000000..c08f7e6
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfAnalysisOutputServiceImpl.java
@@ -0,0 +1,31 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import cn.hutool.core.lang.Assert;
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.supervision.pdfqaserver.domain.PdfAnalysisOutput;
+import com.supervision.pdfqaserver.service.PdfAnalysisOutputService;
+import com.supervision.pdfqaserver.mapper.PdfAnalysisOutputMapper;
+import org.springframework.stereotype.Service;
+
+import java.util.List;
+
+/**
+* @author Administrator
+* @description 针对表【pdf_analysis_output】的数据库操作Service实现
+* @createDate 2025-04-27 11:45:24
+*/
+@Service
+public class PdfAnalysisOutputServiceImpl extends ServiceImpl
+ implements PdfAnalysisOutputService{
+
+ @Override
+ public List queryByPdfId(String pdfId) {
+ Assert.notEmpty(pdfId, "pdfId不能为空");
+
+ return super.lambdaQuery().eq(PdfAnalysisOutput::getPdfId, pdfId).list();
+ }
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java
new file mode 100644
index 0000000..600d857
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/PdfInfoServiceImpl.java
@@ -0,0 +1,22 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.supervision.pdfqaserver.domain.PdfInfo;
+import com.supervision.pdfqaserver.service.PdfInfoService;
+import com.supervision.pdfqaserver.mapper.PdfInfoMapper;
+import org.springframework.stereotype.Service;
+
+/**
+* @author Administrator
+* @description 针对表【pdf_info(pdf信息)】的数据库操作Service实现
+* @createDate 2025-04-27 11:45:24
+*/
+@Service
+public class PdfInfoServiceImpl extends ServiceImpl
+ implements PdfInfoService{
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java
new file mode 100644
index 0000000..57899ca
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleConversionPipelineImpl.java
@@ -0,0 +1,105 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.util.StrUtil;
+import com.supervision.pdfqaserver.cache.PromptCache;
+import com.supervision.pdfqaserver.dto.*;
+import com.supervision.pdfqaserver.service.TripleConversionPipeline;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.ollama.OllamaChatModel;
+import org.springframework.stereotype.Service;
+
+import java.util.ArrayList;
+import java.util.List;
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class TripleConversionPipelineImpl implements TripleConversionPipeline {
+
+ private final OllamaChatModel ollamaChatModel;
+
+ @Override
+ public List sliceDocuments(List documents) {
+ // 对pdfAnalysisOutputs进行排序
+ List documentDTOList = documents.stream().sorted(
+ // 先对pageNo进行排序再对layoutOrder进行排序
+ (o1, o2) -> {
+ if (o1.getPageNo().equals(o2.getPageNo())) {
+ return Integer.compare(o1.getLayoutOrder(), o2.getLayoutOrder());
+ }
+ return Integer.compare(o1.getPageNo(), o2.getPageNo());
+ }
+ ).toList();
+ return null;
+ }
+
+ @Override
+ public EREDTO doEre(TruncateDTO truncateDTO) {
+
+ if (StrUtil.equals(truncateDTO.getLayoutType(),"0")){
+
+ EREDTO eredto = doTextEre(truncateDTO);
+ return eredto;
+ }
+
+ if (StrUtil.equals(truncateDTO.getLayoutType(),"1")){
+ EREDTO eredto = doTableEre(truncateDTO);
+ return eredto;
+ }
+ log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
+ return null;
+ }
+
+ private EREDTO doTextEre(TruncateDTO truncateDTO) {
+ String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TEXT);
+ String formatted = String.format(prompt, truncateDTO.getContent());
+ String response = ollamaChatModel.call(formatted);
+ // todo:暂时不去处理异常返回
+
+ return EREDTO.fromTextJson(response, truncateDTO.getId());
+ }
+
+ private EREDTO doTableEre(TruncateDTO truncateDTO) {
+ String prompt = PromptCache.promptMap.get(PromptCache.DOERE_TABLE);
+ String formatted = String.format(prompt, truncateDTO.getContent());
+ String response = ollamaChatModel.call(formatted);
+ // todo:暂时不去处理异常返回
+
+ return EREDTO.fromTableJson(response, truncateDTO.getId());
+ }
+
+ /**
+ * 合并实体关系抽取结果 主要是对实体和关系中的属性进行合并
+ * @param eredtoList 实体关系抽取结果列表
+ * @return
+ */
+ @Override
+ public List mergeEreResults(List eredtoList) {
+ List merged = new ArrayList<>();
+ if (CollUtil.isEmpty(eredtoList)){
+ return merged;
+ }
+ for (EREDTO eredto : eredtoList) {
+ List entities = eredto.getEntities();
+ if (CollUtil.isNotEmpty(entities)){
+ for (EntityExtractionDTO entity : entities) {
+ String e = entity.getEntity();
+ String name = entity.getName();
+ // entity.getEntity() 和 entity.getName() 完全相等看作是同一个数据
+ }
+ }
+ List relations = eredto.getRelations();
+ if (CollUtil.isNotEmpty(relations)){
+ for (RelationExtractionDTO relation : relations) {
+ String source = relation.getSource();
+ String target = relation.getTarget();
+ String re = relation.getRelation();
+ // source和target,re完全相等看作是同一个数据
+ }
+ }
+ }
+
+ return null;
+ }
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java
new file mode 100644
index 0000000..1d82afa
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TripleToCypherExecutorImpl.java
@@ -0,0 +1,30 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import com.supervision.pdfqaserver.dto.EREDTO;
+import com.supervision.pdfqaserver.service.TripleToCypherExecutor;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.ollama.OllamaChatModel;
+import org.springframework.stereotype.Service;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class TripleToCypherExecutorImpl implements TripleToCypherExecutor {
+
+ private final OllamaChatModel ollamaChatModel;
+ @Override
+ public String generateInsertCypher(EREDTO eredto) {
+ return null;
+ }
+
+ @Override
+ public String generateQueryCypher(String query) {
+ return null;
+ }
+
+ @Override
+ public void executeCypher(String cypher) {
+
+ }
+}
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java
new file mode 100644
index 0000000..aafd16f
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationEntityExtractionServiceImpl.java
@@ -0,0 +1,22 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.supervision.pdfqaserver.domain.TruncationEntityExtraction;
+import com.supervision.pdfqaserver.service.TruncationEntityExtractionService;
+import com.supervision.pdfqaserver.mapper.TruncationEntityExtractionMapper;
+import org.springframework.stereotype.Service;
+
+/**
+* @author Administrator
+* @description 针对表【truncation_entity_extraction(片段实体抽取)】的数据库操作Service实现
+* @createDate 2025-04-27 11:45:24
+*/
+@Service
+public class TruncationEntityExtractionServiceImpl extends ServiceImpl
+ implements TruncationEntityExtractionService{
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationErAttributeServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationErAttributeServiceImpl.java
new file mode 100644
index 0000000..3de845c
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationErAttributeServiceImpl.java
@@ -0,0 +1,22 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.supervision.pdfqaserver.domain.TruncationErAttribute;
+import com.supervision.pdfqaserver.service.TruncationErAttributeService;
+import com.supervision.pdfqaserver.mapper.TruncationErAttributeMapper;
+import org.springframework.stereotype.Service;
+
+/**
+* @author Administrator
+* @description 针对表【truncation_er_attribute(实体表)】的数据库操作Service实现
+* @createDate 2025-04-27 11:45:24
+*/
+@Service
+public class TruncationErAttributeServiceImpl extends ServiceImpl
+ implements TruncationErAttributeService{
+
+}
+
+
+
+
diff --git a/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java
new file mode 100644
index 0000000..5ab6692
--- /dev/null
+++ b/src/main/java/com/supervision/pdfqaserver/service/impl/TruncationRelationExtractionServiceImpl.java
@@ -0,0 +1,22 @@
+package com.supervision.pdfqaserver.service.impl;
+
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.supervision.pdfqaserver.domain.TruncationRelationExtraction;
+import com.supervision.pdfqaserver.service.TruncationRelationExtractionService;
+import com.supervision.pdfqaserver.mapper.TruncationRelationExtractionMapper;
+import org.springframework.stereotype.Service;
+
+/**
+* @author Administrator
+* @description 针对表【truncation_relation_extraction(片段关系抽取)】的数据库操作Service实现
+* @createDate 2025-04-27 11:45:24
+*/
+@Service
+public class TruncationRelationExtractionServiceImpl extends ServiceImpl
+ implements TruncationRelationExtractionService{
+
+}
+
+
+
+
diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml
index e66b0ba..17e179f 100644
--- a/src/main/resources/application.yml
+++ b/src/main/resources/application.yml
@@ -1,6 +1,16 @@
spring:
application:
name: pdf-qa-server
+ datasource:
+ druid:
+ url: jdbc:postgresql://192.168.10.137:54321/pdf-qa
+ username: postgres
+ password: 123456
+ driver-class-name: org.postgresql.Driver
+ servlet:
+ multipart:
+ max-file-size: 10MB
+ max-request-size: 100MB
ai:
ollama:
baseUrl: http://192.168.10.70:11434
diff --git a/src/main/resources/mapper/ChineseEnglishWordsMapper.xml b/src/main/resources/mapper/ChineseEnglishWordsMapper.xml
new file mode 100644
index 0000000..5b71804
--- /dev/null
+++ b/src/main/resources/mapper/ChineseEnglishWordsMapper.xml
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ chinese_word,english_word,create_time,
+ update_time
+
+
diff --git a/src/main/resources/mapper/DocumentTruncationMapper.xml b/src/main/resources/mapper/DocumentTruncationMapper.xml
new file mode 100644
index 0000000..2440ce9
--- /dev/null
+++ b/src/main/resources/mapper/DocumentTruncationMapper.xml
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id,document_id,section_id,title,
+ layout_type,content,create_time,
+ update_time
+
+
diff --git a/src/main/resources/mapper/DomainMetadataMapper.xml b/src/main/resources/mapper/DomainMetadataMapper.xml
new file mode 100644
index 0000000..3f1d122
--- /dev/null
+++ b/src/main/resources/mapper/DomainMetadataMapper.xml
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id,domain_type,source_type,
+ relation,target_type,generation_type,
+ create_time,update_time
+
+
diff --git a/src/main/resources/mapper/PdfAnalysisOutputMapper.xml b/src/main/resources/mapper/PdfAnalysisOutputMapper.xml
new file mode 100644
index 0000000..87a9397
--- /dev/null
+++ b/src/main/resources/mapper/PdfAnalysisOutputMapper.xml
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id,layout_type,content,
+ page_no,pdf_id,table_title,
+ order,create_time
+
+
diff --git a/src/main/resources/mapper/PdfInfoMapper.xml b/src/main/resources/mapper/PdfInfoMapper.xml
new file mode 100644
index 0000000..aa23b3e
--- /dev/null
+++ b/src/main/resources/mapper/PdfInfoMapper.xml
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ id,path,filename,
+ create_time
+
+
diff --git a/src/main/resources/mapper/TruncationEntityExtractionMapper.xml b/src/main/resources/mapper/TruncationEntityExtractionMapper.xml
new file mode 100644
index 0000000..0084953
--- /dev/null
+++ b/src/main/resources/mapper/TruncationEntityExtractionMapper.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id,truncation_id,entity,
+ name,create_time,update_time
+
+
diff --git a/src/main/resources/mapper/TruncationErAttributeMapper.xml b/src/main/resources/mapper/TruncationErAttributeMapper.xml
new file mode 100644
index 0000000..894cebe
--- /dev/null
+++ b/src/main/resources/mapper/TruncationErAttributeMapper.xml
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id,ter_id,type,
+ attribute,value,data_type,
+ create_time,update_time
+
+
diff --git a/src/main/resources/mapper/TruncationRelationExtractionMapper.xml b/src/main/resources/mapper/TruncationRelationExtractionMapper.xml
new file mode 100644
index 0000000..e779946
--- /dev/null
+++ b/src/main/resources/mapper/TruncationRelationExtractionMapper.xml
@@ -0,0 +1,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id,truncation_id,source,
+ source_type,target,target_type,
+ relation,create_time,update_time
+
+