1. 指代消除功能开发

master
xueqingkun 3 months ago
parent e014b70574
commit 0e0cdc4a50

@ -1,5 +1,6 @@
package com.supervision.police.domain;
import cn.hutool.core.util.StrUtil;
import com.baomidou.mybatisplus.annotation.FieldFill;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
@ -53,6 +54,16 @@ public class NoteRecordSplit implements Serializable {
*/
private String answer;
/**
*
*/
private String processedQuestion;
/**
*
*/
private String processedAnswer;
/**
* ()
*/
@ -90,4 +101,11 @@ public class NoteRecordSplit implements Serializable {
@TableField(exist = false)
private static final long serialVersionUID = 1L;
public String getAnswerIfProcessedAbsent() {
return StrUtil.isNotEmpty(this.processedAnswer) ? this.processedAnswer : this.answer;
}
public String getQuestionIfProcessedAbsent() {
return StrUtil.isNotEmpty(this.processedQuestion) ? this.processedQuestion : this.question;
}
}

@ -35,4 +35,10 @@ public interface NoteRecordSplitService extends IService<NoteRecordSplit> {
List<NoteRecordSplit> queryRecordSplitList(String recordId);
/**
*
* @param splitList
*/
void batchUpdatePronoun(List<NoteRecordSplit> splitList);
}

@ -1,7 +1,7 @@
package com.supervision.police.service;
import com.supervision.police.domain.ModelRecordType;
import com.supervision.police.domain.NoteRecordSplit;
import com.supervision.thread.ReplacePronounTask;
import java.util.List;
@ -18,4 +18,22 @@ public interface RecordSplitProcessService {
*/
boolean recordProcessTaskStatusCheck(String caseId, String recordId, Integer splitSize);
/**
*
* @param splitList
* @return note:
* @throws InterruptedException
*/
List<NoteRecordSplit> replacePronoun(List<NoteRecordSplit> splitList) throws InterruptedException;
/**
*
*
* @param splitList
* @param index
*/
ReplacePronounTask buildReplacePronounTask(List<NoteRecordSplit> splitList, int index);
}

@ -109,6 +109,13 @@ public class NoteRecordSplitServiceImpl extends ServiceImpl<NoteRecordSplitMappe
boolean taskStatus = recordSplitProcessService.recordProcessTaskStatusCheck(record.getCaseId(), record.getId(), allNoteRecordSplits.size());
// 如果校验结果为true,则说明需要进行分类以及三元组操作
if (taskStatus) {
// 对笔录进行指代消岐
try {
List<NoteRecordSplit> noteRecordSplits = recordSplitProcessService.replacePronoun(allNoteRecordSplits);
noteRecordSplitService.batchUpdatePronoun(noteRecordSplits);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
// 对笔录进行分类,并对笔录进行提取三元组
List<ModelRecordType> allTypeList = modelRecordTypeService.lambdaQuery().list();
recordSplitProcessService.process(allTypeList, allNoteRecordSplits);
@ -376,6 +383,21 @@ public class NoteRecordSplitServiceImpl extends ServiceImpl<NoteRecordSplitMappe
return super.lambdaQuery().eq(NoteRecordSplit::getNoteRecordId, recordId).list();
}
@Override
public void batchUpdatePronoun(List<NoteRecordSplit> splitList) {
for (NoteRecordSplit noteRecordSplit : splitList) {
if (StrUtil.isEmpty(noteRecordSplit.getId())){
continue;
}
if (StrUtil.isEmpty(noteRecordSplit.getAnswer()) && StrUtil.isEmpty(noteRecordSplit.getQuestion())){
continue;
}
super.lambdaUpdate().set(NoteRecordSplit::getProcessedQuestion, noteRecordSplit.getProcessedQuestion())
.set(NoteRecordSplit::getProcessedAnswer, noteRecordSplit.getProcessedAnswer())
.eq(NoteRecordSplit::getId, noteRecordSplit.getId()).update();
}
}
private void setRecordDetailSerialNumber(List<NoteRecordDetailDTO> noteRecordDetailDTOList) {
List<Pair<String, Integer>> pairs = new ArrayList<>();

@ -1,18 +1,24 @@
package com.supervision.police.service.impl;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import com.supervision.police.domain.CaseTaskRecord;
import com.supervision.police.domain.ModelRecordType;
import com.supervision.police.domain.NoteRecordSplit;
import com.supervision.police.service.CaseTaskRecordService;
import com.supervision.police.service.RecordSplitProcessService;
import com.supervision.police.service.RecordSplitClassifyService;
import com.supervision.thread.ReplacePronounTask;
import com.supervision.thread.ReplacePronounTreadPool;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.ollama.OllamaChatClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.time.LocalDateTime;
import java.util.List;
import java.util.Optional;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
@Slf4j
@Service
@ -23,6 +29,10 @@ public class RecordSplitProcessServiceImpl implements RecordSplitProcessService
private final CaseTaskRecordService caseTaskRecordService;
private final OllamaChatClient chatClient;
@Value("${fu-hsi-config.pronouns}")
private List<String> pronouns;
@Override
public void process(List<ModelRecordType> allTypeList, List<NoteRecordSplit> splitList) {
// 通过异步的形式提交分类
@ -101,4 +111,78 @@ public class RecordSplitProcessServiceImpl implements RecordSplitProcessService
return true;
}
@Override
public List<NoteRecordSplit> replacePronoun(List<NoteRecordSplit> splitList) throws InterruptedException {
List<ReplacePronounTask> tasks = new ArrayList<>();
for (int i = 0; i < splitList.size(); i++) {
NoteRecordSplit noteRecordSplit = splitList.get(i);
String question = noteRecordSplit.getQuestion();
String answer = noteRecordSplit.getAnswer();
if (StrUtil.isEmpty(question) || StrUtil.isEmpty(answer)){
continue;
}
if (!containsProcess(question,pronouns) && !containsProcess(answer,pronouns)){
continue;
}
ReplacePronounTask task = buildReplacePronounTask(splitList, i);
if (null == task){
continue;
}
tasks.add(task);
}
List<Future<NoteRecordSplit>> futures = ReplacePronounTreadPool.executorService.invokeAll(tasks);
List<NoteRecordSplit> result = new ArrayList<>();
for (Future<NoteRecordSplit> future : futures) {
try {
NoteRecordSplit noteRecordSplit = future.get();
if (Objects.nonNull(noteRecordSplit)){
result.add(noteRecordSplit);
}
} catch (ExecutionException e) {
log.error("replacePronoun:代词替换任务执行失败...",e);
}
}
return result;
}
@Override
public ReplacePronounTask buildReplacePronounTask(List<NoteRecordSplit> splitList, int index) {
if (index == 0){
return null;
}
NoteRecordSplit noteRecordSplit = splitList.get(index);
if (StrUtil.isEmpty(noteRecordSplit.getQuestion())){
log.info("replacePronoun:笔录片段:{}问题为空,跳过",noteRecordSplit.getId());
return null;
}
if (StrUtil.isEmpty(noteRecordSplit.getAnswer())){
log.info("replacePronoun:笔录片段:{}答案为空,跳过",noteRecordSplit.getId());
return null;
}
List<NoteRecordSplit> preSplit = index < 3 ? CollUtil.sub(splitList,0,index) : splitList.subList(index-3,index);
return new ReplacePronounTask(chatClient, preSplit, noteRecordSplit);
}
/**
*
* @param str
* @param pronouns
* @return true false
*/
private boolean containsProcess(String str,List<String> pronouns){
for (String pronoun : pronouns) {
if (str.contains(pronoun)) {
return true;
}
}
return false;
}
}

@ -108,8 +108,8 @@ public class RecordSplitClassifyTask implements Callable<String> {
// 开始对笔录进行分类
Map<String, String> paramMap = new HashMap<>();
paramMap.put("typeContext", CollUtil.join(typeContextList, ";"));
paramMap.put("question", noteRecordSplit.getQuestion());
paramMap.put("answer", noteRecordSplit.getAnswer());
paramMap.put("question", noteRecordSplit.getQuestionIfProcessedAbsent());
paramMap.put("answer", noteRecordSplit.getAnswerIfProcessedAbsent());
Prompt prompt = new Prompt(new UserMessage(StrUtil.format(NEW_TEMPLATE, paramMap)));
stopWatch.start();

@ -0,0 +1,100 @@
package com.supervision.thread;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.supervision.police.domain.NoteRecordSplit;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.ChatResponse;
import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.chat.prompt.Prompt;
import org.springframework.ai.ollama.OllamaChatClient;
import org.springframework.util.StopWatch;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
@Slf4j
public class ReplacePronounTask implements Callable<NoteRecordSplit> {
private final OllamaChatClient chatClient;
private final List<NoteRecordSplit> preNoteSplit;
private final NoteRecordSplit noteRecordSplit;
private final String PROMPT_TEMPLATE = """
{preText}
{text}
1. .......
2. {"Question": "办案民警问:.....","Answer":"...答:...."}
3.
4.
5. { "Error": "原因" }
""";
public ReplacePronounTask(OllamaChatClient chatClient, List<NoteRecordSplit> preNoteSplit, NoteRecordSplit noteRecordSplit) {
this.chatClient = chatClient;
this.preNoteSplit = preNoteSplit;
this.noteRecordSplit = noteRecordSplit;
}
@Override
public NoteRecordSplit call() throws Exception {
return chat4ReplacePronoun(preNoteSplit, noteRecordSplit);
}
private NoteRecordSplit chat4ReplacePronoun(List<NoteRecordSplit> preNoteSplit, NoteRecordSplit noteRecordSplit){
StopWatch stopWatch = new StopWatch();
if (CollUtil.isEmpty(preNoteSplit)) {
return noteRecordSplit;
}
// 首先拼接分类模板
StringBuilder sb = new StringBuilder();
Map<String, String> paramMap = new HashMap<>();
for (NoteRecordSplit recordSplit : preNoteSplit) {
sb.append(recordSplit.getQuestion()).append("\n").append(recordSplit.getAnswer()).append("\n");
}
sb.append(noteRecordSplit.getQuestion()).append("\n").append(noteRecordSplit.getAnswer());
paramMap.put("preText", sb.toString());
paramMap.put("text", StrUtil.join("\n", noteRecordSplit.getQuestion(), noteRecordSplit.getAnswer()));
Prompt prompt = new Prompt(new UserMessage(StrUtil.format(PROMPT_TEMPLATE, paramMap)));
log.info("chat4ReplacePronoun:开始替换代词 prompt:{}:", prompt.getContents());
stopWatch.start();
ChatResponse call = chatClient.call(prompt);
stopWatch.stop();
log.info("chat4ReplacePronoun:替换代词结束耗时:{}", stopWatch.getTotalTimeSeconds());
String content = call.getResult().getOutput().getContent();
log.info("chat4ReplacePronoun:替换后的结果:{}", content);
try {
JSONObject jsonObject = JSONUtil.parseObj(content);
String error = jsonObject.getStr("Error");
if (StrUtil.isNotEmpty(error)){
log.info("chat4ReplacePronoun:返回结果表示不能替换,错误原因:{}", error);
return noteRecordSplit;
}
String question = jsonObject.getStr("Question");
if (StrUtil.isNotEmpty(question)){
noteRecordSplit.setProcessedQuestion(question);
}
String answer = jsonObject.getStr("Answer");
if (StrUtil.isNotEmpty(answer)){
noteRecordSplit.setProcessedAnswer(answer);
}
return noteRecordSplit;
} catch (Exception e) {
log.error("chat4ReplacePronoun:返回结果解析失败", e);
}
return noteRecordSplit;
}
}

@ -0,0 +1,23 @@
package com.supervision.thread;
import cn.hutool.core.thread.ThreadUtil;
import jakarta.annotation.PostConstruct;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.util.concurrent.ExecutorService;
@Slf4j
@Component
public class ReplacePronounTreadPool {
@Value("${fu-hsi-config.thread-pool.pronoun.core:2}")
private int core;
public static ExecutorService executorService;
@PostConstruct
public void init() {
log.info("ReplacePronoun线程池初始化。线程数{}", core);
executorService = ThreadUtil.newFixedExecutor(core, 40960, "replacePronoun", false);
}
}

@ -119,8 +119,8 @@ public class TripleExtractTask implements Callable<TripleInfo> {
paramMap.put("headEntityType", prompt.getStartEntityType());
paramMap.put("relation", prompt.getRelType());
paramMap.put("tailEntityType", prompt.getEndEntityType());
paramMap.put("question", noteRecordSplit.getQuestion());
paramMap.put("answer", noteRecordSplit.getAnswer());
paramMap.put("question", noteRecordSplit.getQuestionIfProcessedAbsent());
paramMap.put("answer", noteRecordSplit.getAnswerIfProcessedAbsent());
log.info("开始尝试提取三元组:{}-{}-{},mainActor:{}", prompt.getStartEntityType(), prompt.getRelType(), prompt.getEndEntityType(), mainActor == null ? "" : mainActor.getName());
if (mainActor != null && HEAD_ENTITY_TYPE_ACTOR.equals(prompt.getStartEntityType())) {
paramMap.put("requirement", "当前案件的行为人是" + mainActor.getName() + ",只尝试提取" + mainActor.getName() + "为头结点的三元组。");

@ -32,8 +32,11 @@ fu-hsi-config:
core: 1
type:
core: 1
pronoun:
core: 1
watermark:
template: "经侦取证智能分析模型 {userName}"
pronouns: 你,我,他,她,它,你们,我们,他们,她们,它们
logging:
level:

@ -427,4 +427,31 @@ public class FuHsiApplicationTests {
modelCaseService.migrateRecordKnowledgeBase();
}
@Autowired
private RecordSplitProcessService recordSplitProcessService;
@Test
public void replacePronounTest() throws InterruptedException {
List<NoteRecordSplit> noteRecordSplits = new ArrayList<>();
NoteRecordSplit noteRecordSplit = new NoteRecordSplit();
noteRecordSplit.setQuestion("办案民警问: 你给耿彪出售的车辆你们是否签订相关的协议?");
noteRecordSplit.setAnswer("裴金禄答: 我给耿彪出售的车辆一部分签订了车辆报废车辆转让协议,一部没有签订。");
noteRecordSplits.add(noteRecordSplit);
NoteRecordSplit noteRecordSplit1 = new NoteRecordSplit();
noteRecordSplit1.setQuestion("办案民警问: 耿彪和你签订协议的时候耿彪是否问你要这些车辆的手续? ");
noteRecordSplit1.setAnswer("裴金禄答: 耿彪没有要,");
noteRecordSplits.add(noteRecordSplit1);
NoteRecordSplit noteRecordSplit2 = new NoteRecordSplit();
noteRecordSplit2.setQuestion("办案民警问: 既然耿彪都没有和你要你车辆的手续,他是否清楚这些车辆都是不合法渠道来的? ");
noteRecordSplit2.setAnswer("裴金禄答: 我不知道耿彪是否清楚,我给他撒谎没有手续,耿彪也就默认再也没有和我要返回结果:");
noteRecordSplits.add(noteRecordSplit2);
NoteRecordSplit noteRecordSplit3 = new NoteRecordSplit();
noteRecordSplit3.setQuestion("办案民警问: 你们是在什么地方签订? ");
noteRecordSplit3.setAnswer("裴金禄答: 我每次都是在他的车上签订的车辆转让协议,具体签订地方我不知道,我只知道都是在沙坡头区城里面");
noteRecordSplits.add(noteRecordSplit3);
List<NoteRecordSplit> noteRecordSplits1 = recordSplitProcessService.replacePronoun(noteRecordSplits);
System.out.println("执行完成");
}
}

Loading…
Cancel
Save