From 029a8a636dddf02ef293e100ba049ed186c577bc Mon Sep 17 00:00:00 2001 From: "DESKTOP-DDTUS3E\\yaxin" Date: Tue, 10 Sep 2024 14:33:36 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A4=A7=E6=A8=A1=E5=9E=8B=E6=89=B9=E9=87=8F?= =?UTF-8?q?=E6=8F=90=E5=8F=96=E6=A0=87=E9=A2=98=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../supervision/police/dto/OcrExtractDto.java | 13 +++ .../police/service/OcrExtractService.java | 16 ++++ .../service/impl/OcrExtractServiceImpl.java | 60 ++++++++++++++ .../com/supervision/demo/OcrExtractTest.java | 82 +++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 src/main/java/com/supervision/police/dto/OcrExtractDto.java create mode 100644 src/main/java/com/supervision/police/service/OcrExtractService.java create mode 100644 src/main/java/com/supervision/police/service/impl/OcrExtractServiceImpl.java create mode 100644 src/test/java/com/supervision/demo/OcrExtractTest.java diff --git a/src/main/java/com/supervision/police/dto/OcrExtractDto.java b/src/main/java/com/supervision/police/dto/OcrExtractDto.java new file mode 100644 index 0000000..8319b37 --- /dev/null +++ b/src/main/java/com/supervision/police/dto/OcrExtractDto.java @@ -0,0 +1,13 @@ +package com.supervision.police.dto; + +import lombok.Data; + +/** + * ocr提取dto + */ +@Data +public class OcrExtractDto { + private String id; + private String text; + private String title; +} diff --git a/src/main/java/com/supervision/police/service/OcrExtractService.java b/src/main/java/com/supervision/police/service/OcrExtractService.java new file mode 100644 index 0000000..3273981 --- /dev/null +++ b/src/main/java/com/supervision/police/service/OcrExtractService.java @@ -0,0 +1,16 @@ +package com.supervision.police.service; + +import com.supervision.police.dto.OcrExtractDto; + +import java.util.List; + +public interface OcrExtractService { + + /** + * 提取标题 + * + * @param ocrExtractDtoList ocrExtractDtoList + * @return List + */ + List extractTitle(List ocrExtractDtoList); +} diff --git a/src/main/java/com/supervision/police/service/impl/OcrExtractServiceImpl.java b/src/main/java/com/supervision/police/service/impl/OcrExtractServiceImpl.java new file mode 100644 index 0000000..0fae9b4 --- /dev/null +++ b/src/main/java/com/supervision/police/service/impl/OcrExtractServiceImpl.java @@ -0,0 +1,60 @@ +package com.supervision.police.service.impl; + +import cn.hutool.core.util.StrUtil; +import com.supervision.police.dto.OcrExtractDto; +import com.supervision.police.service.OcrExtractService; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; +import org.json.JSONObject; +import org.springframework.ai.chat.ChatResponse; +import org.springframework.ai.chat.messages.UserMessage; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.ollama.OllamaChatClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +@Slf4j +@Service +public class OcrExtractServiceImpl implements OcrExtractService { + + @Autowired + private OllamaChatClient ollamaChatClient; + + @Override + public List extractTitle(List ocrExtractDtoList) { + List result = new ArrayList<>(); + // 遍历ocrExtractDtoList,调用接口提取标题 + ocrExtractDtoList.forEach(ocrExtractDto -> { + HashMap paramMap = new HashMap<>(); + paramMap.put("text", ocrExtractDto.getText()); + paramMap.put("requirement", "1.尝试提取图中明显的标题。2.始终输出json格式:{\"title\":\"*****\"},没有明显标题则返回:{\"title\":\"\"}。3.只提取title属性,不需要任何其他输出。"); + String template = "要求如下:\n{requirement}\n文本内容如下:\n{text}"; + ChatResponse call = ollamaChatClient.call(new Prompt(new UserMessage(StrUtil.format(template, paramMap)))); + String content = call.getResult().getOutput().getContent(); + log.info("content: {}", content); + //将text置为空,减少返回数据量 + ocrExtractDto.setText(""); + //判断content是否为JSON格式,如果是则尝试转换为JSON格式并获取title属性的值 + try { + JSONObject jsonObject = new JSONObject(content); + String title = jsonObject.getString("title"); + ocrExtractDto.setTitle(title); + } catch (Exception e) { + log.error("解析标题失败", e); + } finally { + //如果未提取到标题,则使用result中最后一个节点的title + if (StringUtils.isEmpty(ocrExtractDto.getTitle())) { + if (!result.isEmpty()) { + ocrExtractDto.setTitle(result.get(result.size() - 1).getTitle()); + } + } + result.add(ocrExtractDto); + } + }); + return result; + } +} diff --git a/src/test/java/com/supervision/demo/OcrExtractTest.java b/src/test/java/com/supervision/demo/OcrExtractTest.java new file mode 100644 index 0000000..e5c3fcc --- /dev/null +++ b/src/test/java/com/supervision/demo/OcrExtractTest.java @@ -0,0 +1,82 @@ +package com.supervision.demo; + +import com.supervision.police.dto.OcrExtractDto; +import com.supervision.police.service.OcrExtractService; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; + +import java.util.ArrayList; +import java.util.List; + +@Slf4j +@SpringBootTest +public class OcrExtractTest { + @Autowired + private OcrExtractService ocrExtractService; + + @Test + public void testExtractTitle() { + List ocrExtractDtoList = new ArrayList<>(); + OcrExtractDto ocrExtractDto = new OcrExtractDto(); + ocrExtractDto.setId("1833322433007398913"); + ocrExtractDto.setText("关于李社辉等人涉嫌合同诈骗罪的\n" + + "报案材料\n" + + "报案人:许锋,男,8。年\n" + + "一月\n" + + "/旦,汉族,\n" + + "号:-640/2119001012218\n" + + "电话:1837571888。\n" + + "被报案人:\n" + + "李社辉,男,年月1/日,汉族,住\n" + + "址:西安市间良7x公园路7株号,身份证号:610114195505110513\n" + + "电话:1810537777。\n" + + "宁夏博金特立体泊车设备有限公司,住所地:青铜峡市嘉宝\n" + + "工业园区7-8号。\n" + + "法定代表人:李社辉,该公司总经理。\n" + + "报案请求:\n" + + "因被报案人李社辉等人涉嫌合同诈骗罪,报案人现向贵局报\n" + + "案,请求立案侦查。\n" + + "事实与理由:\n" + + "一、背景事实\n" + + "2017年4月左右,罗静给报案人打电话,称其时任宁夏博金\n" + + "特立体泊车设备有限公司副总,公司项目的发拨、资金的发放均\n" + + "由他决定,将会有好的项目给报案人介绍。\n" + + "2017年5月10日,罗静给报案人打电话说有一个很好的立\n" + + "体停车场的项目欲承包给报案人,让报案人准备二十万元保证金\n" + + "交到公司,并强调必须是现金。因此,当天下午报案人从银行卡\n" + + "里取出二十万元现金交至被报案人李社辉处,对方给报案人出具"); + ocrExtractDtoList.add(ocrExtractDto); + OcrExtractDto ocrExtractDto2 = new OcrExtractDto(); + ocrExtractDto2.setId("1833322459494428674"); + ocrExtractDto2.setText("了《收据》一张,并与报案人签订了《协议书》一份,约定报案\n" + + "人同意向宁夏博金特立体泊车设备有限公司以现金方式进行担\n" + + "保,交纳质量及施工保证金,即人民币二十万元整。该保证金交\n" + + "至甲方账户后,宁夏博金特立体泊车设备有限公司须确保凯尔福\n" + + "邸项目由报案人完成。\n" + + "二、李社辉涉嫌合同诈骗罪的犯罪事实非常明确、证据确凿,\n" + + "应予认定\n" + + "在报案人与被报案人宁夏博金特立体泊车设备有限公司签订\n" + + "《协议书》后,报案人积极起草项目施工合同,期待与被报案人\n" + + "宁夏博金特立体泊车设备有限公司签订合同,完成合作,但对方\n" + + "总是以各种理由推脱。之后,报案人经多方打听,才知道被报案\n" + + "人宁夏博金特根本没有关于凯尔福邸的项目,报案人才知道上当\n" + + "受骗。报案人就开始向被报案人李社辉索要报案人交的二十万元\n" + + "保证金,被报案人李社辉等人以各种理由推,并且一直欺骗报\n" + + "案人说要给报案人介绍其他工程项目。但是这已经过去了两年,\n" + + "对方并没有给报案人退还保证金,也没有给报案人介绍其他项目。\n" + + "直到现在,被报案人李社辉等人不接听电话,也不出现与报案人\n" + + "处理,报案人深感受骗!\n" + + "李社辉等人骗取报案人钱财是合谋诈骗,是有预谋团伙作案,\n" + + "通过签订假协议的方式诈骗他人财物,涉案数额特别巨大,其行\n" + + "为严重扰乱了正常的市场经济秩序。\n" + + "三、李社辉等人行为已构成犯罪,符合立案标准。\n" + + "(一)立案标准明确\n" + + "《最高人民检察院公安部关于公安机关管辖的刑事案件立案\n" + + "2"); + ocrExtractDtoList.add(ocrExtractDto2); + List result = ocrExtractService.extractTitle(ocrExtractDtoList); + log.info("result: {}", result); + } +}