You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
72 lines
2.5 KiB
Python
72 lines
2.5 KiB
Python
import os
|
|
import subprocess
|
|
import re
|
|
|
|
# 图片文件夹路径
|
|
base_image_dir = 'E:/Project/PaddleOCR/bilu_txt_dispose/Crop_img'
|
|
# OCR 结果保存路径
|
|
output_dir = 'E:/Project/PaddleOCR/bilu_txt_dispose/OCR/OCR_txt'
|
|
|
|
# 创建输出目录
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# 用于去除 ANSI 转义序列的正则表达式
|
|
ansi_escape = re.compile(r'\x1b[^m]*m')
|
|
|
|
|
|
def remove_ansi_escape_sequences(text):
|
|
# 去除 ANSI 转义序列
|
|
text = ansi_escape.sub('', text)
|
|
# 去除换行符
|
|
text = text.replace('\n', '')
|
|
return text
|
|
|
|
|
|
# 遍历每个子文件夹
|
|
for foldername in os.listdir(base_image_dir):
|
|
folder_path = os.path.join(base_image_dir, foldername)
|
|
|
|
if not os.path.isdir(folder_path):
|
|
continue
|
|
|
|
# 获取每个子文件夹中的所有图片文件路径
|
|
image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if
|
|
os.path.isfile(os.path.join(folder_path, f))]
|
|
|
|
# 保存结果的文件路径
|
|
result_file_path = os.path.join(output_dir, f'{foldername}.txt')
|
|
|
|
with open(result_file_path, 'w', encoding='utf-8') as result_file:
|
|
# 循环处理每个图片文件
|
|
for image_file in image_files:
|
|
# 构建命令行命令
|
|
command = [
|
|
'python', 'tools/infer/predict_system_1.py',
|
|
'--use_gpu=False',
|
|
f'--cls_model_dir=./models/cls',
|
|
f'--rec_model_dir=./models/rec',
|
|
f'--det_model_dir=./models/det',
|
|
f'--image_dir={image_file}'
|
|
]
|
|
|
|
# 执行命令
|
|
print(f"Processing {os.path.basename(image_file)} in {foldername}...")
|
|
try:
|
|
result = subprocess.run(command, capture_output=True, text=True, encoding='utf-8')
|
|
|
|
# 解析并写入识别结果
|
|
result_output = remove_ansi_escape_sequences(result.stdout.strip())
|
|
if result_output:
|
|
result_file.write(f"{result_output}\n")
|
|
|
|
if result.stderr:
|
|
print(f"Error processing {os.path.basename(image_file)} in {foldername}: {result.stderr}")
|
|
except UnicodeDecodeError as e:
|
|
print(
|
|
f"UnicodeDecodeError occurred while processing {os.path.basename(image_file)} in {foldername}: {e}")
|
|
continue # Skip this file and continue with the next one
|
|
|
|
print(f"Results for folder {foldername} saved to {result_file_path}")
|
|
|
|
print("All folders processed.")
|