You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

759 lines
30 KiB
Java

2 months ago
package com.supervision.pdfqaserver.cache;
import java.util.HashMap;
import java.util.Map;
/**
*
*/
public class PromptCache {
public static final String DOERE_TEXT = "DOERE_TEXT";
public static final String DOERE_TABLE = "DOERE_TABLE";
public static final String TEXT_TO_CYPHER = "TEXT_TO_CYPHER";
public static final String GENERATE_ANSWER = "GENERATE_ANSWER";
2 months ago
public static final String CHINESE_TO_ENGLISH = "CHINESE_TO_ENGLISH";
public static final String ERE_TO_INSERT_CYPHER = "ERE_TO_INSERT_CYPHER";
public static final String CLASSIFY_TABLE = "CLASSIFY_TABLE";
public static final String EXTRACT_TABLE_TITLE = "EXTRACT_TABLE_TITLE";
/**
* PDF
*/
public static final String CLASSIFY_CONTENT_TYPE = "CLASSIFY_CONTENT_TYPE";
/**
*
*/
public static final String CLASSIFY_INDUSTRY = "CLASSIFY_INDUSTRY";
/**
*
*/
public static final String CLASSIFY_INTENT = "CLASSIFY_INTENT";
/**
* (使)
*/
public static final String CLASSIFY_INTENT_TRAIN = "CLASSIFY_INTENT_TRAIN";
/**
*
*/
public static final String EXTRACT_INTENT_METADATA = "EXTRACT_INTENT_METADATA";
/**
*
*/
public static final String EXTRACT_ERE_BASE_INTENT = "EXTRACT_ERE_BASE_INTENT";
2 months ago
public static final Map<String, String> promptMap = new HashMap<>();
static {
init();
}
private static void init(){
promptMap.put(DOERE_TEXT, DOERE_TEXT_PROMPT);
promptMap.put(DOERE_TABLE, DOERE_TABLE_PROMPT);
2 months ago
promptMap.put(CHINESE_TO_ENGLISH, CHINESE_TO_ENGLISH_PROMPT);
promptMap.put(ERE_TO_INSERT_CYPHER, ERE_TO_INSERT_CYPHER_PROMPT);
promptMap.put(TEXT_TO_CYPHER, TEXT_TO_CYPHER_PROMPT);
promptMap.put(GENERATE_ANSWER, GENERATE_ANSWER_PROMPT);
promptMap.put(CLASSIFY_TABLE, CLASSIFY_TABLE_PROMPT);
promptMap.put(EXTRACT_TABLE_TITLE, EXTRACT_TABLE_TITLE_PROMPT);
promptMap.put(CLASSIFY_CONTENT_TYPE, CLASSIFY_CONTENT_TYPE_PROMPT);
promptMap.put(CLASSIFY_INDUSTRY, CLASSIFY_INDUSTRY_PROMPT);
promptMap.put(CLASSIFY_INTENT, CLASSIFY_INTENT_PROMPT);
promptMap.put(CLASSIFY_INTENT_TRAIN, CLASSIFY_INTENT_TRAIN_PROMPT);
promptMap.put(EXTRACT_INTENT_METADATA, EXTRACT_INTENT_METADATA_PROMPT);
promptMap.put(EXTRACT_ERE_BASE_INTENT, EXTRACT_ERE_BASE_INTENT_PROMPT);
2 months ago
}
private static final String DOERE_TEXT_PROMPT = """
JSON
2 months ago
1. ****
-
-
-
2. ****
-
-
-
3. ****
- (, , )
****
- JSON使```json ```Markdown
2 months ago
- 使JSON Schema
{
"nodes": [
{
"name": "节点名称",
"type": "节点类型",
"attributes": {
"属性名1": "属性值1",
"属性名2": "属性值2"
}
}
],
"relations": [
{
"source": "头节点名称",
"target": "尾节点名称",
"type": "关系类型",
"attributes": {
"关系属性名1": "关系属性值1"
}
}
],
"typed_triplets": [
["头节点类型", "关系类型", "尾节点类型"]
]
}
****
1. "科学家"/"发明"/"研究所"
2.
3. "特斯拉""埃隆·马斯克的公司"
****
"爱因斯坦在1905年发表了狭义相对论论文这篇革命性理论后来被普林斯顿高等研究院深入研究"
****
{
"nodes": [
{
"name": "爱因斯坦",
"type": "物理学家",
"attributes": {
"领域": "理论物理"
}
},
{
"name": "狭义相对论",
"type": "科学理论",
"attributes": {
"发表年份": 1905,
"重要性": "革命性"
}
},
{
"name": "普林斯顿高等研究院",
"type": "科研机构",
"attributes": {
"研究领域": "理论科学"
}
}
],
"relations": [
{
"source": "爱因斯坦",
"target": "狭义相对论",
"type": "发表",
"attributes": {
"时间": 1905
}
},
{
"source": "普林斯顿高等研究院",
"target": "狭义相对论",
"type": "研究",
"attributes": {
"强度描述": "深入"
}
}
],
"typed_triplets": [
["物理学家", "发表", "科学理论"],
["科研机构", "研究", "科学理论"]
]
}
{}
""";
private static final String DOERE_TABLE_PROMPT = """
2 months ago
****
1.
2.
3.
4.
5.
2 months ago
****
{
"table_data": [
{
"[第一列表头]": "[第一列值]",
"[第二列表头]": "[第二列值]",
"[第三列表头]": "[第三列值]"
},
// 后续行...
]
}
2 months ago
****
| | | |
| --- | --- | --- |
| 1 | 310,844,201.27 | 337,641,834.84 |
| 12 | 52,374,904.35 | 15,041,750.36 |
****
{
"table_data": [
{
"账龄": "1年以内",
"期末余额": "310,844,201.27",
"年初余额": "337,641,834.84"
},
{
"账龄": "1至2年",
"期末余额": "52,374,904.35",
"年初余额": "15,041,750.36"
}
]
}
{}
""";
private static final String TEXT_TO_CYPHER_PROMPT = """
1 month ago
Neo4j Cypher
---
****
- **relationType**
{relationTypeList}
- **sourceType**
{sourceTypeList}
- **targetType**
{targetTypeList}
---
****
1. `Cypher `
2. 使 `WHERE`
3. 使 c r t
4. JSON \\{ "cypherQueries": [ "MATCH ... RETURN c, r, t", ... ] \\}
5. relationTypesourceType targetType
"无法根据数据库结构生成查询"
6. 使relationTypesourceType targetType
---
****
1. - ****
- ** Cypher **
"\\{
"cypherQueries": [
"MATCH (c:`公司`)-[r:`收购`]->(t:`公司`) RETURN c, r, t",
1 month ago
"MATCH (c:`上市公司`)-[r:`收购`]->(t:`公司`) RETURN c, r, t",
1 month ago
"MATCH (c:`公司`)-[r:`收购`]->(t:`上市公司`) RETURN c, r, t",
1 month ago
"MATCH (c:`电力公司`)-[r:`收购`]->(t:`国有企业`) RETURN c, r, t",
1 month ago
.....
]
\\}"
2. - ****
- ** Cypher **
"\\{
"cypherQueries": [
"MATCH (c:`公司`)-[r:`包含`]->(t:`报告`) RETURN c, r, t",
.....
]
\\}"
{query}
Cypher
1 month ago
""";
private static final String GENERATE_ANSWER_PROMPT = """
1 month ago
1 month ago
1 month ago
{example_text}
1 month ago
{query}
1 month ago
1 month ago
1. 使
2.
"抱歉目前知识库没有这个问题的答案."
3.
1 month ago
/no_think
""";
2 months ago
2 months ago
private static final String CHINESE_TO_ENGLISH_PROMPT = """
Neo4jNeo4j
2 months ago
1. ****
- 使`UpperCamelCase``ProductCategory`
-
-
2. ****
-
-
- /"腾讯" Tencent
3. ****
- : "用户订单" : UserOrder
- : "属于2023年" : BELONGS_TO_2023
- : "5G网络设备" : 5GNetworkDevice
- : "评分大于90" : SCORE_ABOVE_90
4. ****
{}
5. ****
- 使``````Markdown
-
2 months ago
""";
private static final String ERE_TO_INSERT_CYPHER_PROMPT = """
Neo4jCypher
1. ****`(n:Label {name: "Value"})``Label``Person``Company`
2. ****`[r:RELATION_TYPE]`
3. 使`MERGE`
4. Cypher
###
[
{"source": "人物","sourceType": "Person", "relation": "创始人", "relationType": "FOUNDED","target": "公司","targetType": "Company"},
{"source": "公司","sourceType": "Company ", "relation": "位于", "relationType": "LOCATED_IN","target": "城市","targetType": "City "}
]
###
MERGE (p:Person {name: "人物"})
MERGE (c:Company {name: "公司"})
MERGE (city:City {name: "城市"})
MERGE (p)-[r1:FOUNDED]->(c)
MERGE (c)-[r2:LOCATED_IN]->(city)
###
1.
- "人物" `Person`
- "公司" `Company`
- "城市" `City`
2.
- "创始人" `FOUNDED`
- "位于" `LOCATED_IN`
3. `name`
###
1.
2. 使```Person`
3. MERGE
###
{}
""";
private static final String CLASSIFY_TABLE_PROMPT = """
****
1. ****
- ****
- ****
- 2****"关键审计事项""审计应对"
-
```
| | |
|---------------------------|-----------------------------|
| ... | ...... |
```
2. ****
- ****
- ****
- 6****"2023年12月31日""附注"
-
```
| | | 20231231 | 202311 |
|--------------|------|---------------------|-------------------|
| | .1 | 4,879,272,436.13 | 20,493,232,077.05 |
```
****
- ********
-
****
| | |
| --- | --- |
| 49 2023376\\.42亿 5\\.57% | 1 |
****
{}
""";
private static final String EXTRACT_TABLE_TITLE_PROMPT = """
****
-
****
-
****
-
****
{}
""";
private static final String CLASSIFY_CONTENT_TYPE_PROMPT = """
# PDF
##
`ContentType`PDFJSON
##
{ContentType}
##
PDF:
{text}
##
1. ** `ContentType` **
- `ContentType`
- `0`
- `1`
- `2`
2. ****
- `ContentType` `{"ContentType": }`
- `{}`
3. ****
```json
{
"0": "研报类型(行业分析、财务数据)",
"1": "对话类型(会议记录、问答交流)",
"2": "记录类型(操作日志、事务记录)"
}
```
##
```json
// 示例1指定类型0文本符合研报特征
{
"text": "2023年新能源汽车渗透率达35%乘联会预计2024年突破50%"
}
{"ContentType": 0}
// 示例2指定类型1文本不符合对话特征
{
"text": "系统启动执行数据同步"
}
{}
// 示例3指定类型2文本符合记录特征
{
"text": "2023-10-01 14:00 用户登录异常14:05 触发安全警报"
}
{"ContentType": 2}
```
---
****
- `ContentType`
-
-
##
1. JSON
2.
- `{"ContentType": 0/1/2}`
- `{}`
./no_think
""";
private static final String CLASSIFY_INDUSTRY_PROMPT = """
###
###
```
{text}
```
###
{industryCategory}
###
* ****
* ****
*
* JSON使```json ```Markdown
###
```
{
industryCategory:
}
```
""";
private static final String CLASSIFY_INTENT_PROMPT = """
#
##
##
{IntentType}
##
1.
2.
3.
##
{text}
##
```json
// 示例1匹配单个意图
{
"text": "本公司注册地址为上海市浦东新区张江高科技园区"
}
{
"IntentTypeList": ["公司地址"]
}
// 示例2匹配多个意图
{
"text": "2023年度财务报告显示公司总部位于北京全年营收..."
}
{
"IntentTypeList": ["公司地址", "公司年度报告"]
}
// 示例3无匹配意图
{
"text": "今天的天气很适合户外活动"
}
{}
""";
private static final String CLASSIFY_INTENT_TRAIN_PROMPT = """
#
##
PDF
##
{text}
##
```json
// 示例1
{
"text": "..."
}
{
"IntentTypeList": ["...", "..."]
}
// 示例2文本意图无法识别
{
"text": "人生短短几个球"
}
{}
```
##
1. JSON
2. JSON使```json ```Markdown
3.
```json
{"IntentTypeList": ["...", "..."]}
```
-
```json
{}
```
3.使......
./no_think
""";
private static final String EXTRACT_INTENT_METADATA_PROMPT = """
#
##
JSON
##
-
{text}
-
{IntentTypeList}
##
1.
2.
- source
- relation
- target
- intent
3. /
- type
- attributes
4. 使
```json
[
{
"source": {
"type": "实体类型1",
"attributes": ["属性1", "属性2"]
},
"relation": {
"type": "关系类型",
"attributes": []
},
"target": {
"type": "实体类型2",
"attributes": ["属性3"]
},
"intent": "匹配的意图标签"
}
]
5.
""";
private static final String EXTRACT_ERE_BASE_INTENT_PROMPT = """
#
##
##
- {text}
-
{domainMetadata}
##
{
"nodes": [
{
"type": "公司",
"attributes": {
"名称": "龙源(酒泉)风力发电有限公司",
"地址": "雨花台区"
}
},
{
"type": "电子银行承兑汇票",
"attributes": {
"金额": "100.00万元",
"打印时间": "2024年10月20号"
}
},
{
"type": "公司",
"attributes": {
"名称": "杭州六小龙",
"地址": "杭州高新区"
}
}
],
"relations": [
{
"type": "持有",
"attributes": {
}
},
{
"type": "收购",
"attributes": {
"收购类型": "全资收购"
"收购时间""2025年5月28号"
}
}
],
"typed_triplets": [
[
"公司",
"持有",
"电子银行承兑汇票"
],
[
"公司",
"收购",
"公司"
]
]
}
##
- `domainMetadata`
-
-
- JSON使```json ```Markdown
""";
2 months ago
}