generateGraph功能优化

master
xueqingkun 1 month ago
parent c72724f00b
commit 72b87cfd3f

@ -16,6 +16,9 @@ public class PromptCache {
public static final String CHINESE_TO_ENGLISH = "CHINESE_TO_ENGLISH"; public static final String CHINESE_TO_ENGLISH = "CHINESE_TO_ENGLISH";
public static final String ERE_TO_INSERT_CYPHER = "ERE_TO_INSERT_CYPHER"; public static final String ERE_TO_INSERT_CYPHER = "ERE_TO_INSERT_CYPHER";
public static final String CLASSIFY_TABLE = "CLASSIFY_TABLE";
public static final Map<String, String> promptMap = new HashMap<>(); public static final Map<String, String> promptMap = new HashMap<>();
static { static {
@ -28,6 +31,7 @@ public class PromptCache {
promptMap.put(ERE_TO_INSERT_CYPHER, ERE_TO_INSERT_CYPHER_PROMPT); promptMap.put(ERE_TO_INSERT_CYPHER, ERE_TO_INSERT_CYPHER_PROMPT);
promptMap.put(TEXT_TO_CYPHER, TEXT_TO_CYPHER_PROMPT); promptMap.put(TEXT_TO_CYPHER, TEXT_TO_CYPHER_PROMPT);
promptMap.put(GENERATE_ANSWER, GENERATE_ANSWER_PROMPT); promptMap.put(GENERATE_ANSWER, GENERATE_ANSWER_PROMPT);
promptMap.put(CLASSIFY_TABLE, CLASSIFY_TABLE_PROMPT);
} }
@ -327,4 +331,46 @@ public class PromptCache {
### ###
{} {}
"""; """;
private static final String CLASSIFY_TABLE_PROMPT = """
****
1. ****
- ****
- ****
- 2****"关键审计事项""审计应对"
-
```
| | |
|---------------------------|-----------------------------|
| ... | ...... |
```
2. ****
- ****
- ****
- 6****"2023年12月31日""附注"
-
```
| | | 20231231 | 202311 |
|--------------|------|---------------------|-------------------|
| | .1 | 4,879,272,436.13 | 20,493,232,077.05 |
```
****
- ********
-
****
| | |
| --- | --- |
| 49 2023376\\.42亿 5\\.57% | 1 |
****
{}
""";
} }

@ -0,0 +1,18 @@
package com.supervision.pdfqaserver.dto;
import lombok.Data;
@Data
public class TableTitleDTO {
private String title;
// 编制
private String createdBy;
// 编制时间
private String createdByTime;
// 金额单位
private String amountUnit;
}

@ -2,6 +2,7 @@ package com.supervision.pdfqaserver.service;
import com.supervision.pdfqaserver.dto.EREDTO; import com.supervision.pdfqaserver.dto.EREDTO;
import com.supervision.pdfqaserver.dto.DocumentDTO; import com.supervision.pdfqaserver.dto.DocumentDTO;
import com.supervision.pdfqaserver.dto.TableTitleDTO;
import com.supervision.pdfqaserver.dto.TruncateDTO; import com.supervision.pdfqaserver.dto.TruncateDTO;
import java.util.List; import java.util.List;
@ -26,6 +27,21 @@ public interface TripleConversionPipeline {
*/ */
EREDTO doEre(TruncateDTO truncateDTO); EREDTO doEre(TruncateDTO truncateDTO);
/**
*
* @param content
* @return true- false-
*/
Boolean classify(String content);
/**
*
* @param content
* @return
*/
TableTitleDTO extractTableTitle(String content);
/** /**
* *
* @param eredtoList * @param eredtoList

@ -67,7 +67,12 @@ public class KnowledgeGraphServiceImpl implements KnowledgeGraphService {
log.info("开始命名实体识别..."); log.info("开始命名实体识别...");
List<EREDTO> eredtoList = new ArrayList<>(); List<EREDTO> eredtoList = new ArrayList<>();
for (TruncateDTO truncateDTO : truncateDTOS) { for (TruncateDTO truncateDTO : truncateDTOS) {
EREDTO eredto = tripleConversionPipeline.doEre(truncateDTO); EREDTO eredto = null;
try {
eredto = tripleConversionPipeline.doEre(truncateDTO);
} catch (Exception e) {
log.error("命名实体识别失败,切分文档id:{}", truncateDTO.getId(), e);
}
if (null == eredto){ if (null == eredto){
continue; continue;
} }

@ -1,6 +1,8 @@
package com.supervision.pdfqaserver.service.impl; package com.supervision.pdfqaserver.service.impl;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.BooleanUtil;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import com.supervision.pdfqaserver.cache.PromptCache; import com.supervision.pdfqaserver.cache.PromptCache;
import com.supervision.pdfqaserver.constant.LayoutTypeEnum; import com.supervision.pdfqaserver.constant.LayoutTypeEnum;
@ -74,22 +76,48 @@ public class TripleConversionPipelineImpl implements TripleConversionPipeline {
public EREDTO doEre(TruncateDTO truncateDTO) { public EREDTO doEre(TruncateDTO truncateDTO) {
if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TEXT.getCode()))){ if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TEXT.getCode()))){
return doTextEre(truncateDTO);
}
try { if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TABLE.getCode()))){
// 先分析表格是否是描述类型
Boolean classify = this.classify(truncateDTO.getContent());
if (null == classify){
log.info("doEre:表格分类结果为空,切分文档id:{}", truncateDTO.getId());
return null;
}
if (classify){
return doTextEre(truncateDTO); return doTextEre(truncateDTO);
} catch (Exception e) {
log.error("doEre:文本实体关系抽取失败,内容:{}", truncateDTO.getContent(), e);
} }
return doTableEre(truncateDTO);
} }
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType());
return null;
}
if (StrUtil.equals(truncateDTO.getLayoutType(),String.valueOf(LayoutTypeEnum.TABLE.getCode()))){ @Override
try { public Boolean classify(String content) {
return doTableEre(truncateDTO); Assert.notEmpty(content, "内容不能为空");
} catch (Exception e) { // 对表格内容进行精简,只获取与前四行相关的内容
log.error("doEre:表格实体关系抽取失败,内容:{}", truncateDTO.getContent(), e); String[] lines = content.split("\n");
if (lines.length > 5){
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 5; i++) {
sb.append(lines[i]).append("\n");
} }
content = sb.toString();
} }
log.info("doEre:错误的布局类型: {}", truncateDTO.getLayoutType()); log.info("classify:开始进行实体关系分类,内容:{}", content);
String prompt = PromptCache.promptMap.get(PromptCache.CLASSIFY_TABLE);
String format = StrUtil.format(prompt, content);
String response = ollamaChatModel.call(format);
log.info("classify响应结果:{}", response);
return BooleanUtil.toBooleanObject(response);
}
@Override
public TableTitleDTO extractTableTitle(String content) {
return null; return null;
} }

@ -25,4 +25,10 @@ neo4j:
driver: driver:
uri: bolt://192.168.10.137:17687 uri: bolt://192.168.10.137:17687
user: neo4j user: neo4j
password: 12345678 password: 12345678
graph:
generate:
thread-pool:
core: 2
max: 4

@ -2,6 +2,7 @@ package com.supervision.pdfqaserver;
import com.supervision.pdfqaserver.service.ChinesEsToEnglishGenerator; import com.supervision.pdfqaserver.service.ChinesEsToEnglishGenerator;
import com.supervision.pdfqaserver.service.KnowledgeGraphService; import com.supervision.pdfqaserver.service.KnowledgeGraphService;
import com.supervision.pdfqaserver.service.TripleConversionPipeline;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.neo4j.driver.*; import org.neo4j.driver.*;
@ -89,5 +90,19 @@ class PdfQaServerApplicationTests {
System.out.println("翻译结果: " + english); System.out.println("翻译结果: " + english);
} }
@Autowired
TripleConversionPipeline tripleConversionPipeline;
@Test
void testChinesEsToEnglishGenerator2() {
String s = """
| | <br> | | | | 20231231 | 202311 |
|------------------------------------------|---------|--|------|----|--------------------|--------------------|
| : | | | | | | |
| | | | 1 | | 4,879,272,436.13 | 20,493,232,077.05 |
""";
Boolean classify = tripleConversionPipeline.classify(s);
System.out.println(classify);
}
} }

Loading…
Cancel
Save