You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.8 KiB
Python

import os
import subprocess
import json
def process_images_in_folder(image_folder, output_json_file):
results = [] # 用于存储所有图片的识别结果
# 使用 os.walk 遍历文件夹及其子文件夹
for root, dirs, files in os.walk(image_folder):
for image_file in files:
# 获取每个图片的完整路径
temp_image_file = os.path.join(root, image_file)
# 检查是否为图片文件
if os.path.splitext(image_file)[-1].lower() in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
# 构造命令,调用 `predict_system_1.py` 进行处理
command = [
'python', 'tools/infer/predict_system_1.py',
'--use_gpu=False',
'--cls_model_dir=./models/cls',
'--rec_model_dir=./models/rec',
'--det_model_dir=./models/det',
f'--image_dir={temp_image_file}'
]
try:
# 执行命令,指定 UTF-8 编码并捕获输出
result = subprocess.run(command, capture_output=True, text=True, encoding='utf-8')
# 检查命令是否成功执行
if result.returncode == 0:
ocr_result = result.stdout.strip() # 获取识别结果
else:
ocr_result = f"识别失败,返回代码: {result.returncode}"
# 将结果保存为字典格式,添加到结果列表中
results.append({
"图片名称": image_file,
"文件路径": temp_image_file,
"识别结果": ocr_result
})
except Exception as e:
# 捕获处理图片时的错误并存储
results.append({
"图片名称": image_file,
"文件路径": temp_image_file,
"识别结果": f"处理图片时发生错误: {str(e)}"
})
# 将结果保存到指定的 JSON 文件
with open(output_json_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=4) # 使用 UTF-8 保存 JSON 文件
print(f"识别结果已保存到 {output_json_file}")
if __name__ == "__main__":
# 设置待处理图片所在的文件夹路径
# image_folder = r"E:\Project\PaddleOcr_v4\contract"
image_folder = r"E:\Project\PaddleOcr_v4\OCR_LLM_attribute\output_2\MaiMaiHeTong"
# 设置输出 JSON 文件的路径
output_json_file = r"E:\Project\PaddleOcr_v4\OCR_LLM_attribute\output_2\MaiMaiHeTong_results.json"
# 调用函数处理图片并保存结果
process_images_in_folder(image_folder, output_json_file)