From 0e0cdc4a50ecd1172f6dc54e10292629e42b0397 Mon Sep 17 00:00:00 2001 From: xueqingkun Date: Sat, 8 Feb 2025 13:53:23 +0800 Subject: [PATCH] =?UTF-8?q?1.=20=E6=8C=87=E4=BB=A3=E6=B6=88=E9=99=A4?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E5=BC=80=E5=8F=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../police/domain/NoteRecordSplit.java | 18 ++++ .../service/NoteRecordSplitService.java | 6 ++ .../service/RecordSplitProcessService.java | 20 +++- .../impl/NoteRecordSplitServiceImpl.java | 22 ++++ .../impl/RecordSplitProcessServiceImpl.java | 90 +++++++++++++++- .../thread/RecordSplitClassifyTask.java | 4 +- .../thread/ReplacePronounTask.java | 100 ++++++++++++++++++ .../thread/ReplacePronounTreadPool.java | 23 ++++ .../supervision/thread/TripleExtractTask.java | 4 +- src/main/resources/application.yml | 3 + .../demo/FuHsiApplicationTests.java | 27 +++++ 11 files changed, 309 insertions(+), 8 deletions(-) create mode 100644 src/main/java/com/supervision/thread/ReplacePronounTask.java create mode 100644 src/main/java/com/supervision/thread/ReplacePronounTreadPool.java diff --git a/src/main/java/com/supervision/police/domain/NoteRecordSplit.java b/src/main/java/com/supervision/police/domain/NoteRecordSplit.java index 34b0ac6..4aaf7b1 100644 --- a/src/main/java/com/supervision/police/domain/NoteRecordSplit.java +++ b/src/main/java/com/supervision/police/domain/NoteRecordSplit.java @@ -1,5 +1,6 @@ package com.supervision.police.domain; +import cn.hutool.core.util.StrUtil; import com.baomidou.mybatisplus.annotation.FieldFill; import com.baomidou.mybatisplus.annotation.TableField; import com.baomidou.mybatisplus.annotation.TableId; @@ -53,6 +54,16 @@ public class NoteRecordSplit implements Serializable { */ private String answer; + /** + * 处理后问题 + */ + private String processedQuestion; + + /** + * 处理后回答 + */ + private String processedAnswer; + /** * 笔录类型(总结) */ @@ -90,4 +101,11 @@ public class NoteRecordSplit implements Serializable { @TableField(exist = false) private static final long serialVersionUID = 1L; + + public String getAnswerIfProcessedAbsent() { + return StrUtil.isNotEmpty(this.processedAnswer) ? this.processedAnswer : this.answer; + } + public String getQuestionIfProcessedAbsent() { + return StrUtil.isNotEmpty(this.processedQuestion) ? this.processedQuestion : this.question; + } } diff --git a/src/main/java/com/supervision/police/service/NoteRecordSplitService.java b/src/main/java/com/supervision/police/service/NoteRecordSplitService.java index efe3652..ccb0676 100644 --- a/src/main/java/com/supervision/police/service/NoteRecordSplitService.java +++ b/src/main/java/com/supervision/police/service/NoteRecordSplitService.java @@ -35,4 +35,10 @@ public interface NoteRecordSplitService extends IService { List queryRecordSplitList(String recordId); + + /** + * 批量更新笔录片段 + * @param splitList 笔录片段列表 + */ + void batchUpdatePronoun(List splitList); } diff --git a/src/main/java/com/supervision/police/service/RecordSplitProcessService.java b/src/main/java/com/supervision/police/service/RecordSplitProcessService.java index 7603063..e7b8322 100644 --- a/src/main/java/com/supervision/police/service/RecordSplitProcessService.java +++ b/src/main/java/com/supervision/police/service/RecordSplitProcessService.java @@ -1,7 +1,7 @@ package com.supervision.police.service; - import com.supervision.police.domain.ModelRecordType; import com.supervision.police.domain.NoteRecordSplit; +import com.supervision.thread.ReplacePronounTask; import java.util.List; @@ -18,4 +18,22 @@ public interface RecordSplitProcessService { */ boolean recordProcessTaskStatusCheck(String caseId, String recordId, Integer splitSize); + + /** + * 替换笔录中的代词 + * @param splitList 笔录片段列表 + * @return 替换后的笔录片段列表 note:如果没有被替换,则不会出现在返回结果中 + * @throws InterruptedException + */ + List replacePronoun(List splitList) throws InterruptedException; + + + /** + * 替换笔录中的代词 + * + * @param splitList 笔录片段列表 + * @param index 笔录片段索引 + */ + ReplacePronounTask buildReplacePronounTask(List splitList, int index); + } diff --git a/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java b/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java index 4fe0f18..f5b2ebc 100644 --- a/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java +++ b/src/main/java/com/supervision/police/service/impl/NoteRecordSplitServiceImpl.java @@ -109,6 +109,13 @@ public class NoteRecordSplitServiceImpl extends ServiceImpl noteRecordSplits = recordSplitProcessService.replacePronoun(allNoteRecordSplits); + noteRecordSplitService.batchUpdatePronoun(noteRecordSplits); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } // 对笔录进行分类,并对笔录进行提取三元组 List allTypeList = modelRecordTypeService.lambdaQuery().list(); recordSplitProcessService.process(allTypeList, allNoteRecordSplits); @@ -376,6 +383,21 @@ public class NoteRecordSplitServiceImpl extends ServiceImpl splitList) { + for (NoteRecordSplit noteRecordSplit : splitList) { + if (StrUtil.isEmpty(noteRecordSplit.getId())){ + continue; + } + if (StrUtil.isEmpty(noteRecordSplit.getAnswer()) && StrUtil.isEmpty(noteRecordSplit.getQuestion())){ + continue; + } + super.lambdaUpdate().set(NoteRecordSplit::getProcessedQuestion, noteRecordSplit.getProcessedQuestion()) + .set(NoteRecordSplit::getProcessedAnswer, noteRecordSplit.getProcessedAnswer()) + .eq(NoteRecordSplit::getId, noteRecordSplit.getId()).update(); + } + } + private void setRecordDetailSerialNumber(List noteRecordDetailDTOList) { List> pairs = new ArrayList<>(); diff --git a/src/main/java/com/supervision/police/service/impl/RecordSplitProcessServiceImpl.java b/src/main/java/com/supervision/police/service/impl/RecordSplitProcessServiceImpl.java index 4053b19..944488e 100644 --- a/src/main/java/com/supervision/police/service/impl/RecordSplitProcessServiceImpl.java +++ b/src/main/java/com/supervision/police/service/impl/RecordSplitProcessServiceImpl.java @@ -1,18 +1,24 @@ package com.supervision.police.service.impl; - +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.util.StrUtil; import com.supervision.police.domain.CaseTaskRecord; import com.supervision.police.domain.ModelRecordType; import com.supervision.police.domain.NoteRecordSplit; import com.supervision.police.service.CaseTaskRecordService; import com.supervision.police.service.RecordSplitProcessService; import com.supervision.police.service.RecordSplitClassifyService; +import com.supervision.thread.ReplacePronounTask; +import com.supervision.thread.ReplacePronounTreadPool; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.ollama.OllamaChatClient; +import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import java.time.LocalDateTime; -import java.util.List; -import java.util.Optional; +import java.util.*; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; @Slf4j @Service @@ -23,6 +29,10 @@ public class RecordSplitProcessServiceImpl implements RecordSplitProcessService private final CaseTaskRecordService caseTaskRecordService; + private final OllamaChatClient chatClient; + + @Value("${fu-hsi-config.pronouns}") + private List pronouns; @Override public void process(List allTypeList, List splitList) { // 通过异步的形式提交分类 @@ -101,4 +111,78 @@ public class RecordSplitProcessServiceImpl implements RecordSplitProcessService return true; } + + + @Override + public List replacePronoun(List splitList) throws InterruptedException { + + List tasks = new ArrayList<>(); + for (int i = 0; i < splitList.size(); i++) { + + NoteRecordSplit noteRecordSplit = splitList.get(i); + String question = noteRecordSplit.getQuestion(); + String answer = noteRecordSplit.getAnswer(); + if (StrUtil.isEmpty(question) || StrUtil.isEmpty(answer)){ + continue; + } + if (!containsProcess(question,pronouns) && !containsProcess(answer,pronouns)){ + continue; + } + + ReplacePronounTask task = buildReplacePronounTask(splitList, i); + if (null == task){ + continue; + } + tasks.add(task); + } + List> futures = ReplacePronounTreadPool.executorService.invokeAll(tasks); + List result = new ArrayList<>(); + for (Future future : futures) { + try { + NoteRecordSplit noteRecordSplit = future.get(); + if (Objects.nonNull(noteRecordSplit)){ + result.add(noteRecordSplit); + } + } catch (ExecutionException e) { + log.error("replacePronoun:代词替换任务执行失败...",e); + } + } + return result; + } + + @Override + public ReplacePronounTask buildReplacePronounTask(List splitList, int index) { + if (index == 0){ + return null; + } + NoteRecordSplit noteRecordSplit = splitList.get(index); + if (StrUtil.isEmpty(noteRecordSplit.getQuestion())){ + log.info("replacePronoun:笔录片段:{}问题为空,跳过",noteRecordSplit.getId()); + return null; + } + if (StrUtil.isEmpty(noteRecordSplit.getAnswer())){ + log.info("replacePronoun:笔录片段:{}答案为空,跳过",noteRecordSplit.getId()); + return null; + } + List preSplit = index < 3 ? CollUtil.sub(splitList,0,index) : splitList.subList(index-3,index); + return new ReplacePronounTask(chatClient, preSplit, noteRecordSplit); + } + + + + + /** + * 判断是否包含代词 + * @param str 笔录片段 + * @param pronouns 代词列表 + * @return 是否包含 true 包含 false 不包含 + */ + private boolean containsProcess(String str,List pronouns){ + for (String pronoun : pronouns) { + if (str.contains(pronoun)) { + return true; + } + } + return false; + } } diff --git a/src/main/java/com/supervision/thread/RecordSplitClassifyTask.java b/src/main/java/com/supervision/thread/RecordSplitClassifyTask.java index 74cc8f1..7884b75 100644 --- a/src/main/java/com/supervision/thread/RecordSplitClassifyTask.java +++ b/src/main/java/com/supervision/thread/RecordSplitClassifyTask.java @@ -108,8 +108,8 @@ public class RecordSplitClassifyTask implements Callable { // 开始对笔录进行分类 Map paramMap = new HashMap<>(); paramMap.put("typeContext", CollUtil.join(typeContextList, ";")); - paramMap.put("question", noteRecordSplit.getQuestion()); - paramMap.put("answer", noteRecordSplit.getAnswer()); + paramMap.put("question", noteRecordSplit.getQuestionIfProcessedAbsent()); + paramMap.put("answer", noteRecordSplit.getAnswerIfProcessedAbsent()); Prompt prompt = new Prompt(new UserMessage(StrUtil.format(NEW_TEMPLATE, paramMap))); stopWatch.start(); diff --git a/src/main/java/com/supervision/thread/ReplacePronounTask.java b/src/main/java/com/supervision/thread/ReplacePronounTask.java new file mode 100644 index 0000000..f57b283 --- /dev/null +++ b/src/main/java/com/supervision/thread/ReplacePronounTask.java @@ -0,0 +1,100 @@ +package com.supervision.thread; + +import cn.hutool.core.collection.CollUtil; +import cn.hutool.core.util.StrUtil; +import cn.hutool.json.JSONObject; +import cn.hutool.json.JSONUtil; +import com.supervision.police.domain.NoteRecordSplit; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.chat.ChatResponse; +import org.springframework.ai.chat.messages.UserMessage; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.ollama.OllamaChatClient; +import org.springframework.util.StopWatch; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; + +@Slf4j +public class ReplacePronounTask implements Callable { + + private final OllamaChatClient chatClient; + + private final List preNoteSplit; + + private final NoteRecordSplit noteRecordSplit; + + private final String PROMPT_TEMPLATE = """ + 完整问答(包含当前对话的前三句): + {preText} + + 当前问答: + {text} + + 任务: + 1. 参考完整问答,将当前问答中的指代词【“他”、“他们”、“她”、“她们” 、“它”、“你”、“你们”、“我”、“我们”、.......】替换成对应人名。 + 2. 输出格式为{"Question": "办案民警问:.....","Answer":"...答:...."} + 3. 只替换指代词,其他文字不修改。 + 4. 当前问答中可能不只有一个指代词,所有指代词都需要替换。问题中的指代词也要替换。 + 5. 尽量替换,如果不能,请输出: { "Error": "原因" } + """; + + public ReplacePronounTask(OllamaChatClient chatClient, List preNoteSplit, NoteRecordSplit noteRecordSplit) { + this.chatClient = chatClient; + this.preNoteSplit = preNoteSplit; + this.noteRecordSplit = noteRecordSplit; + } + @Override + public NoteRecordSplit call() throws Exception { + return chat4ReplacePronoun(preNoteSplit, noteRecordSplit); + } + + private NoteRecordSplit chat4ReplacePronoun(List preNoteSplit, NoteRecordSplit noteRecordSplit){ + StopWatch stopWatch = new StopWatch(); + + if (CollUtil.isEmpty(preNoteSplit)) { + return noteRecordSplit; + } + + // 首先拼接分类模板 + StringBuilder sb = new StringBuilder(); + Map paramMap = new HashMap<>(); + for (NoteRecordSplit recordSplit : preNoteSplit) { + sb.append(recordSplit.getQuestion()).append("\n").append(recordSplit.getAnswer()).append("\n"); + } + sb.append(noteRecordSplit.getQuestion()).append("\n").append(noteRecordSplit.getAnswer()); + paramMap.put("preText", sb.toString()); + paramMap.put("text", StrUtil.join("\n", noteRecordSplit.getQuestion(), noteRecordSplit.getAnswer())); + Prompt prompt = new Prompt(new UserMessage(StrUtil.format(PROMPT_TEMPLATE, paramMap))); + + log.info("chat4ReplacePronoun:开始替换代词 prompt:{}:", prompt.getContents()); + stopWatch.start(); + ChatResponse call = chatClient.call(prompt); + stopWatch.stop(); + log.info("chat4ReplacePronoun:替换代词结束耗时:{}", stopWatch.getTotalTimeSeconds()); + + String content = call.getResult().getOutput().getContent(); + log.info("chat4ReplacePronoun:替换后的结果:{}", content); + try { + JSONObject jsonObject = JSONUtil.parseObj(content); + String error = jsonObject.getStr("Error"); + if (StrUtil.isNotEmpty(error)){ + log.info("chat4ReplacePronoun:返回结果表示不能替换,错误原因:{}", error); + return noteRecordSplit; + } + String question = jsonObject.getStr("Question"); + if (StrUtil.isNotEmpty(question)){ + noteRecordSplit.setProcessedQuestion(question); + } + String answer = jsonObject.getStr("Answer"); + if (StrUtil.isNotEmpty(answer)){ + noteRecordSplit.setProcessedAnswer(answer); + } + return noteRecordSplit; + } catch (Exception e) { + log.error("chat4ReplacePronoun:返回结果解析失败", e); + } + return noteRecordSplit; + } +} diff --git a/src/main/java/com/supervision/thread/ReplacePronounTreadPool.java b/src/main/java/com/supervision/thread/ReplacePronounTreadPool.java new file mode 100644 index 0000000..e07780b --- /dev/null +++ b/src/main/java/com/supervision/thread/ReplacePronounTreadPool.java @@ -0,0 +1,23 @@ +package com.supervision.thread; + +import cn.hutool.core.thread.ThreadUtil; +import jakarta.annotation.PostConstruct; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; +import java.util.concurrent.ExecutorService; +@Slf4j +@Component +public class ReplacePronounTreadPool { + + @Value("${fu-hsi-config.thread-pool.pronoun.core:2}") + private int core; + + public static ExecutorService executorService; + + @PostConstruct + public void init() { + log.info("ReplacePronoun线程池初始化。线程数:{}", core); + executorService = ThreadUtil.newFixedExecutor(core, 40960, "replacePronoun", false); + } +} diff --git a/src/main/java/com/supervision/thread/TripleExtractTask.java b/src/main/java/com/supervision/thread/TripleExtractTask.java index 86bceb8..1be81ff 100644 --- a/src/main/java/com/supervision/thread/TripleExtractTask.java +++ b/src/main/java/com/supervision/thread/TripleExtractTask.java @@ -119,8 +119,8 @@ public class TripleExtractTask implements Callable { paramMap.put("headEntityType", prompt.getStartEntityType()); paramMap.put("relation", prompt.getRelType()); paramMap.put("tailEntityType", prompt.getEndEntityType()); - paramMap.put("question", noteRecordSplit.getQuestion()); - paramMap.put("answer", noteRecordSplit.getAnswer()); + paramMap.put("question", noteRecordSplit.getQuestionIfProcessedAbsent()); + paramMap.put("answer", noteRecordSplit.getAnswerIfProcessedAbsent()); log.info("开始尝试提取三元组:{}-{}-{},mainActor:{}", prompt.getStartEntityType(), prompt.getRelType(), prompt.getEndEntityType(), mainActor == null ? "" : mainActor.getName()); if (mainActor != null && HEAD_ENTITY_TYPE_ACTOR.equals(prompt.getStartEntityType())) { paramMap.put("requirement", "当前案件的行为人是" + mainActor.getName() + ",只尝试提取" + mainActor.getName() + "为头结点的三元组。"); diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 0039917..9cf7cbd 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -32,8 +32,11 @@ fu-hsi-config: core: 1 type: core: 1 + pronoun: + core: 1 watermark: template: "经侦取证智能分析模型 {userName}" + pronouns: 你,我,他,她,它,你们,我们,他们,她们,它们 logging: level: diff --git a/src/test/java/com/supervision/demo/FuHsiApplicationTests.java b/src/test/java/com/supervision/demo/FuHsiApplicationTests.java index fef779c..331e884 100644 --- a/src/test/java/com/supervision/demo/FuHsiApplicationTests.java +++ b/src/test/java/com/supervision/demo/FuHsiApplicationTests.java @@ -427,4 +427,31 @@ public class FuHsiApplicationTests { modelCaseService.migrateRecordKnowledgeBase(); } + @Autowired + private RecordSplitProcessService recordSplitProcessService; + @Test + public void replacePronounTest() throws InterruptedException { + List noteRecordSplits = new ArrayList<>(); + NoteRecordSplit noteRecordSplit = new NoteRecordSplit(); + noteRecordSplit.setQuestion("办案民警问: 你给耿彪出售的车辆你们是否签订相关的协议?"); + noteRecordSplit.setAnswer("裴金禄答: 我给耿彪出售的车辆一部分签订了车辆报废车辆转让协议,一部没有签订。"); + noteRecordSplits.add(noteRecordSplit); + NoteRecordSplit noteRecordSplit1 = new NoteRecordSplit(); + noteRecordSplit1.setQuestion("办案民警问: 耿彪和你签订协议的时候耿彪是否问你要这些车辆的手续? "); + noteRecordSplit1.setAnswer("裴金禄答: 耿彪没有要,"); + noteRecordSplits.add(noteRecordSplit1); + NoteRecordSplit noteRecordSplit2 = new NoteRecordSplit(); + noteRecordSplit2.setQuestion("办案民警问: 既然耿彪都没有和你要你车辆的手续,他是否清楚这些车辆都是不合法渠道来的? "); + noteRecordSplit2.setAnswer("裴金禄答: 我不知道耿彪是否清楚,我给他撒谎没有手续,耿彪也就默认再也没有和我要返回结果:"); + noteRecordSplits.add(noteRecordSplit2); + + NoteRecordSplit noteRecordSplit3 = new NoteRecordSplit(); + noteRecordSplit3.setQuestion("办案民警问: 你们是在什么地方签订? "); + noteRecordSplit3.setAnswer("裴金禄答: 我每次都是在他的车上签订的车辆转让协议,具体签订地方我不知道,我只知道都是在沙坡头区城里面"); + noteRecordSplits.add(noteRecordSplit3); + + List noteRecordSplits1 = recordSplitProcessService.replacePronoun(noteRecordSplits); + System.out.println("执行完成"); + } + }