|
|
|
|
package com.supervision.pdfqaserver.cache;
|
|
|
|
|
|
|
|
|
|
import java.util.HashMap;
|
|
|
|
|
import java.util.Map;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 提示词缓存
|
|
|
|
|
*/
|
|
|
|
|
public class PromptCache {
|
|
|
|
|
|
|
|
|
|
public static final String DOERE_TEXT = "DOERE_TEXT";
|
|
|
|
|
public static final String DOERE_TABLE = "DOERE_TABLE";
|
|
|
|
|
|
|
|
|
|
public static final String CHINESE_TO_ENGLISH = "CHINESE_TO_ENGLISH";
|
|
|
|
|
|
|
|
|
|
public static final String ERE_TO_INSERT_CYPHER = "ERE_TO_INSERT_CYPHER";
|
|
|
|
|
public static final Map<String, String> promptMap = new HashMap<>();
|
|
|
|
|
|
|
|
|
|
static {
|
|
|
|
|
init();
|
|
|
|
|
}
|
|
|
|
|
private static void init(){
|
|
|
|
|
promptMap.put(DOERE_TEXT, DOERE_TEXT_PROMPT);
|
|
|
|
|
promptMap.put(DOERE_TABLE, DOERE_TABLE_PROMPT);
|
|
|
|
|
promptMap.put(CHINESE_TO_ENGLISH, CHINESE_TO_ENGLISH_PROMPT);
|
|
|
|
|
promptMap.put(ERE_TO_INSERT_CYPHER, ERE_TO_INSERT_CYPHER_PROMPT);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static final String DOERE_TEXT_PROMPT = """
|
|
|
|
|
你是一个高级信息抽取引擎,请从给定文本中提取以下结构化信息并以JSON格式输出:
|
|
|
|
|
|
|
|
|
|
1. **节点提取**:
|
|
|
|
|
- 识别所有实体作为节点
|
|
|
|
|
- 自动推断每个节点的类型
|
|
|
|
|
- 记录节点的所有相关属性(键值对形式)
|
|
|
|
|
|
|
|
|
|
2. **关系提取**:
|
|
|
|
|
- 识别所有节点间的关系
|
|
|
|
|
- 自动推断关系类型
|
|
|
|
|
- 记录关系的所有相关属性(键值对形式)
|
|
|
|
|
|
|
|
|
|
3. **类型化三元组**:
|
|
|
|
|
- 生成由 (头节点类型, 关系类型, 尾节点类型) 组成的元组
|
|
|
|
|
|
|
|
|
|
**输出要求**:
|
|
|
|
|
- 使用如下JSON Schema:
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
"nodes": [
|
|
|
|
|
{
|
|
|
|
|
"name": "节点名称",
|
|
|
|
|
"type": "节点类型",
|
|
|
|
|
"attributes": {
|
|
|
|
|
"属性名1": "属性值1",
|
|
|
|
|
"属性名2": "属性值2"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"relations": [
|
|
|
|
|
{
|
|
|
|
|
"source": "头节点名称",
|
|
|
|
|
"target": "尾节点名称",
|
|
|
|
|
"type": "关系类型",
|
|
|
|
|
"attributes": {
|
|
|
|
|
"关系属性名1": "关系属性值1"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"typed_triplets": [
|
|
|
|
|
["头节点类型", "关系类型", "尾节点类型"]
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
**处理规则**:
|
|
|
|
|
1. 节点类型和关系类型由你根据上下文语义自动创建(如"科学家"/"发明"/"研究所")
|
|
|
|
|
2. 属性字段应包含文本中明确提及或可推导的特征(如数值、时间、状态等)
|
|
|
|
|
3. 对同一实体的不同指代需进行合并(如"特斯拉"和"埃隆·马斯克的公司")
|
|
|
|
|
|
|
|
|
|
**示例文本**:
|
|
|
|
|
"爱因斯坦在1905年发表了狭义相对论论文,这篇革命性理论后来被普林斯顿高等研究院深入研究"
|
|
|
|
|
|
|
|
|
|
**期望输出**:
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
"nodes": [
|
|
|
|
|
{
|
|
|
|
|
"name": "爱因斯坦",
|
|
|
|
|
"type": "物理学家",
|
|
|
|
|
"attributes": {
|
|
|
|
|
"领域": "理论物理"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "狭义相对论",
|
|
|
|
|
"type": "科学理论",
|
|
|
|
|
"attributes": {
|
|
|
|
|
"发表年份": 1905,
|
|
|
|
|
"重要性": "革命性"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "普林斯顿高等研究院",
|
|
|
|
|
"type": "科研机构",
|
|
|
|
|
"attributes": {
|
|
|
|
|
"研究领域": "理论科学"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"relations": [
|
|
|
|
|
{
|
|
|
|
|
"source": "爱因斯坦",
|
|
|
|
|
"target": "狭义相对论",
|
|
|
|
|
"type": "发表",
|
|
|
|
|
"attributes": {
|
|
|
|
|
"时间": 1905
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"source": "普林斯顿高等研究院",
|
|
|
|
|
"target": "狭义相对论",
|
|
|
|
|
"type": "研究",
|
|
|
|
|
"attributes": {
|
|
|
|
|
"强度描述": "深入"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"typed_triplets": [
|
|
|
|
|
["物理学家", "发表", "科学理论"],
|
|
|
|
|
["科研机构", "研究", "科学理论"]
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
请处理以下文本:
|
|
|
|
|
{}
|
|
|
|
|
""";
|
|
|
|
|
|
|
|
|
|
private static final String DOERE_TABLE_PROMPT = """
|
|
|
|
|
你是一个表格数据处理专家,请严格按以下要求从给出的表格中提取数据:
|
|
|
|
|
|
|
|
|
|
**处理规则:**
|
|
|
|
|
1. 完全保留原始表头字段名称,不做任何中英文转换或修改
|
|
|
|
|
2. 将每行数据转换为一个独立对象
|
|
|
|
|
3. 所有数值保留原始格式(包括逗号分隔符和小数点)
|
|
|
|
|
4. 表格第一列作为主键字段
|
|
|
|
|
|
|
|
|
|
**输出格式:**
|
|
|
|
|
```json
|
|
|
|
|
{
|
|
|
|
|
"table_data": [
|
|
|
|
|
{
|
|
|
|
|
"[第一列表头]": "[第一列值]",
|
|
|
|
|
"[第二列表头]": "[第二列值]",
|
|
|
|
|
"[第三列表头]": "[第三列值]"
|
|
|
|
|
},
|
|
|
|
|
// 后续行...
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
**示例表格:**
|
|
|
|
|
| 账龄 | 期末余额 | 年初余额 |
|
|
|
|
|
| --- | --- | --- |
|
|
|
|
|
| 1年以内 | 310,844,201.27 | 337,641,834.84 |
|
|
|
|
|
| 1至2年 | 52,374,904.35 | 15,041,750.36 |
|
|
|
|
|
|
|
|
|
|
**期望输出:**
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
"table_data": [
|
|
|
|
|
{
|
|
|
|
|
"账龄": "1年以内",
|
|
|
|
|
"期末余额": "310,844,201.27",
|
|
|
|
|
"年初余额": "337,641,834.84"
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"账龄": "1至2年",
|
|
|
|
|
"期末余额": "52,374,904.35",
|
|
|
|
|
"年初余额": "15,041,750.36"
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
请处理以下表格:
|
|
|
|
|
{}
|
|
|
|
|
""";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static final String CHINESE_TO_ENGLISH_PROMPT = """
|
|
|
|
|
你是一个表格数据处理专家,请严格按以下要求从给出的表格中提取数据:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static final String ERE_TO_INSERT_CYPHER_PROMPT = """
|
|
|
|
|
请将以下三元组数据转换为Neo4j的Cypher语句,要求:
|
|
|
|
|
1. **节点**用`(n:Label {name: "Value"})`表示,其中`Label`是实体类型(如`Person`、`Company`);
|
|
|
|
|
2. **关系**用`[r:RELATION_TYPE]`表示,保持与三元组中关系一致;
|
|
|
|
|
3. 如果节点或关系已存在,使用`MERGE`避免重复创建;
|
|
|
|
|
4. 返回完整的Cypher语句,不要解释。
|
|
|
|
|
|
|
|
|
|
### 输入三元组示例
|
|
|
|
|
```json
|
|
|
|
|
[
|
|
|
|
|
{"source": "人物","sourceType": "Person", "relation": "创始人", "relationType": "FOUNDED","target": "公司","targetType": "Company"},
|
|
|
|
|
{"source": "公司","sourceType": "Company ", "relation": "位于", "relationType": "LOCATED_IN","target": "城市","targetType": "City "}
|
|
|
|
|
]
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
### 输出示例
|
|
|
|
|
|
|
|
|
|
MERGE (p:Person {name: "人物"})
|
|
|
|
|
MERGE (c:Company {name: "公司"})
|
|
|
|
|
MERGE (city:City {name: "城市"})
|
|
|
|
|
MERGE (p)-[r1:FOUNDED]->(c)
|
|
|
|
|
MERGE (c)-[r2:LOCATED_IN]->(city)
|
|
|
|
|
|
|
|
|
|
### 规则补充
|
|
|
|
|
1. 实体类型映射:
|
|
|
|
|
- "人物" → `Person`
|
|
|
|
|
- "公司" → `Company`
|
|
|
|
|
- "城市" → `City`
|
|
|
|
|
2. 关系类型映射:
|
|
|
|
|
- "创始人" → `FOUNDED`
|
|
|
|
|
- "位于" → `LOCATED_IN`
|
|
|
|
|
3. 属性统一用`name`字段存储实体名称。
|
|
|
|
|
|
|
|
|
|
### 禁止行为
|
|
|
|
|
1. 不要为关系添加属性(除非明确提供);
|
|
|
|
|
2. 不要使用中文标签(如`人物`→`Person`);
|
|
|
|
|
3. 不要省略MERGE的安全约束。
|
|
|
|
|
### 请转换以下三元组:
|
|
|
|
|
{}
|
|
|
|
|
""";
|
|
|
|
|
}
|