pdf文本提取

topo_dev
DESKTOP-DDTUS3E\yaxin 6 months ago
parent b52e04c83c
commit edf9f23e87

@ -168,6 +168,12 @@
<version>1.70</version> <version>1.70</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.28</version>
</dependency>
<dependency> <dependency>
<groupId>com.xuxueli</groupId> <groupId>com.xuxueli</groupId>
<artifactId>xxl-job-core</artifactId> <artifactId>xxl-job-core</artifactId>

@ -0,0 +1,23 @@
package com.supervision.utils;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import java.io.IOException;
import java.io.InputStream;
@Slf4j
public class PDFReadUtil {
public static String pdf2text(InputStream inputStream) {
String text = "";
try (PDDocument document = PDDocument.load(inputStream)) {
PDFTextStripper stripper = new PDFTextStripper();
text = stripper.getText(document);
} catch (IOException e) {
log.error("解析PDF文件失败", e);
}
return text;
}
}

@ -1,8 +1,10 @@
package com.supervision.demo; package com.supervision.demo;
import com.deepoove.poi.XWPFTemplate; import com.deepoove.poi.XWPFTemplate;
import com.supervision.minio.service.MinioService;
import com.supervision.police.dto.caseScore.CaseScoreDetailDTO; import com.supervision.police.dto.caseScore.CaseScoreDetailDTO;
import com.supervision.police.service.ModelService; import com.supervision.police.service.ModelService;
import com.supervision.utils.PDFReadUtil;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
@ -15,8 +17,12 @@ import java.util.HashMap;
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) @SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
public class WordRenderTest { public class WordRenderTest {
@Autowired
private MinioService minioService;
@Autowired @Autowired
private ModelService modelService; private ModelService modelService;
public static void main(String[] args) throws FileNotFoundException { public static void main(String[] args) throws FileNotFoundException {
HashMap<String, Object> data = new HashMap<>(); HashMap<String, Object> data = new HashMap<>();
data.put("name", "张三"); data.put("name", "张三");
@ -54,4 +60,10 @@ public class WordRenderTest {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
@Test
public void pdf2text() {// 创建文件对象
String content = PDFReadUtil.pdf2text(minioService.getObjectInputStream("1848552470327439362"));
log.info("content:{}", content);
}
} }

Loading…
Cancel
Save