修改表提取方式为程序快速提取
parent
8b4ec18483
commit
1fb2c75dc3
@ -0,0 +1,98 @@
|
||||
package com.supervision.pdfqaserver.service;
|
||||
|
||||
import org.commonmark.ext.gfm.tables.*;
|
||||
import org.commonmark.node.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class TableVisitor extends AbstractVisitor {
|
||||
private boolean inHeader = false;
|
||||
private boolean inBody = false;
|
||||
private List<String> currentRow = null;
|
||||
|
||||
private List<String> headers = new ArrayList<>();
|
||||
|
||||
private final List<List<String>> rows = new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public void visit(CustomBlock customBlock) {
|
||||
if (customBlock instanceof TableBlock) {
|
||||
handleTableBlock((TableBlock) customBlock);
|
||||
} else {
|
||||
super.visit(customBlock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(CustomNode customNode) {
|
||||
if (customNode instanceof TableHead) {
|
||||
handleTableHead((TableHead) customNode);
|
||||
} else if (customNode instanceof TableBody) {
|
||||
handleTableBody((TableBody) customNode);
|
||||
} else if (customNode instanceof TableRow) {
|
||||
handleTableRow((TableRow) customNode);
|
||||
} else if (customNode instanceof TableCell) {
|
||||
handleTableCell((TableCell) customNode);
|
||||
} else {
|
||||
super.visit(customNode);
|
||||
}
|
||||
}
|
||||
|
||||
private void handleTableBlock(TableBlock tableBlock) {
|
||||
// 重置状态
|
||||
inHeader = false;
|
||||
inBody = false;
|
||||
visitChildren(tableBlock);
|
||||
}
|
||||
|
||||
private void handleTableHead(TableHead tableHead) {
|
||||
inHeader = true;
|
||||
visitChildren(tableHead);
|
||||
inHeader = false;
|
||||
}
|
||||
|
||||
private void handleTableBody(TableBody tableBody) {
|
||||
inBody = true;
|
||||
visitChildren(tableBody);
|
||||
inBody = false;
|
||||
}
|
||||
|
||||
private void handleTableRow(TableRow tableRow) {
|
||||
currentRow = new ArrayList<>();
|
||||
visitChildren(tableRow);
|
||||
|
||||
if (inHeader) {
|
||||
this.headers = currentRow;
|
||||
} else if (inBody) {
|
||||
this.rows.add(currentRow);
|
||||
}
|
||||
}
|
||||
|
||||
private void handleTableCell(TableCell tableCell) {
|
||||
if (currentRow != null) {
|
||||
currentRow.add(getTextContent(tableCell));
|
||||
}
|
||||
visitChildren(tableCell);
|
||||
}
|
||||
|
||||
private String getTextContent(Node node) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Node child = node.getFirstChild();
|
||||
while (child != null) {
|
||||
if (child instanceof Text) {
|
||||
sb.append(((Text) child).getLiteral());
|
||||
}
|
||||
child = child.getNext();
|
||||
}
|
||||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
public List<String> getTableHeaders() {
|
||||
return headers;
|
||||
}
|
||||
|
||||
public List<List<String>> getTableRows() {
|
||||
return rows;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue