From edf9f23e8751c6f177eb6b51f095a99587b28120 Mon Sep 17 00:00:00 2001 From: "DESKTOP-DDTUS3E\\yaxin" Date: Tue, 22 Oct 2024 10:40:58 +0800 Subject: [PATCH] =?UTF-8?q?pdf=E6=96=87=E6=9C=AC=E6=8F=90=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 6 +++++ .../com/supervision/utils/PDFReadUtil.java | 23 +++++++++++++++++++ .../com/supervision/utils/WordReadUtil.java | 2 +- .../com/supervision/demo/WordRenderTest.java | 14 ++++++++++- 4 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 src/main/java/com/supervision/utils/PDFReadUtil.java diff --git a/pom.xml b/pom.xml index 165e5d4..191ce8f 100644 --- a/pom.xml +++ b/pom.xml @@ -168,6 +168,12 @@ 1.70 + + org.apache.pdfbox + pdfbox + 2.0.28 + + com.xuxueli xxl-job-core diff --git a/src/main/java/com/supervision/utils/PDFReadUtil.java b/src/main/java/com/supervision/utils/PDFReadUtil.java new file mode 100644 index 0000000..e619777 --- /dev/null +++ b/src/main/java/com/supervision/utils/PDFReadUtil.java @@ -0,0 +1,23 @@ +package com.supervision.utils; + +import lombok.extern.slf4j.Slf4j; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; + +import java.io.IOException; +import java.io.InputStream; + +@Slf4j +public class PDFReadUtil { + + public static String pdf2text(InputStream inputStream) { + String text = ""; + try (PDDocument document = PDDocument.load(inputStream)) { + PDFTextStripper stripper = new PDFTextStripper(); + text = stripper.getText(document); + } catch (IOException e) { + log.error("解析PDF文件失败!", e); + } + return text; + } +} diff --git a/src/main/java/com/supervision/utils/WordReadUtil.java b/src/main/java/com/supervision/utils/WordReadUtil.java index ede7468..15a5aea 100644 --- a/src/main/java/com/supervision/utils/WordReadUtil.java +++ b/src/main/java/com/supervision/utils/WordReadUtil.java @@ -58,7 +58,7 @@ public class WordReadUtil { public static String readWord(InputStream inputStream) { StringBuilder stringBuilder = new StringBuilder(); - try (inputStream) { + try (inputStream) { // 创建 XWPFDocument 对象 XWPFDocument document = new XWPFDocument(inputStream); // 获取所有段落 diff --git a/src/test/java/com/supervision/demo/WordRenderTest.java b/src/test/java/com/supervision/demo/WordRenderTest.java index 5499961..8da54ae 100644 --- a/src/test/java/com/supervision/demo/WordRenderTest.java +++ b/src/test/java/com/supervision/demo/WordRenderTest.java @@ -1,8 +1,10 @@ package com.supervision.demo; import com.deepoove.poi.XWPFTemplate; +import com.supervision.minio.service.MinioService; import com.supervision.police.dto.caseScore.CaseScoreDetailDTO; import com.supervision.police.service.ModelService; +import com.supervision.utils.PDFReadUtil; import lombok.extern.slf4j.Slf4j; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -16,7 +18,11 @@ import java.util.HashMap; public class WordRenderTest { @Autowired - private ModelService modelService; + private MinioService minioService; + + @Autowired + private ModelService modelService; + public static void main(String[] args) throws FileNotFoundException { HashMap data = new HashMap<>(); data.put("name", "张三"); @@ -54,4 +60,10 @@ public class WordRenderTest { throw new RuntimeException(e); } } + + @Test + public void pdf2text() {// 创建文件对象 + String content = PDFReadUtil.pdf2text(minioService.getObjectInputStream("1848552470327439362")); + log.info("content:{}", content); + } }