You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
68 lines
2.8 KiB
Python
68 lines
2.8 KiB
Python
6 months ago
|
import os
|
||
|
import subprocess
|
||
|
import json
|
||
|
|
||
|
|
||
|
def process_images_in_folder(image_folder, output_json_file):
|
||
|
results = [] # 用于存储所有图片的识别结果
|
||
|
|
||
|
# 使用 os.walk 遍历文件夹及其子文件夹
|
||
|
for root, dirs, files in os.walk(image_folder):
|
||
|
for image_file in files:
|
||
|
# 获取每个图片的完整路径
|
||
|
temp_image_file = os.path.join(root, image_file)
|
||
|
|
||
|
# 检查是否为图片文件
|
||
|
if os.path.splitext(image_file)[-1].lower() in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
|
||
|
# 构造命令,调用 `predict_system_1.py` 进行处理
|
||
|
command = [
|
||
|
'python', 'tools/infer/predict_system_1.py',
|
||
|
'--use_gpu=False',
|
||
|
'--cls_model_dir=./models/cls',
|
||
|
'--rec_model_dir=./models/rec',
|
||
|
'--det_model_dir=./models/det',
|
||
|
f'--image_dir={temp_image_file}'
|
||
|
]
|
||
|
|
||
|
try:
|
||
|
# 执行命令,指定 UTF-8 编码并捕获输出
|
||
|
result = subprocess.run(command, capture_output=True, text=True, encoding='utf-8')
|
||
|
|
||
|
# 检查命令是否成功执行
|
||
|
if result.returncode == 0:
|
||
|
ocr_result = result.stdout.strip() # 获取识别结果
|
||
|
else:
|
||
|
ocr_result = f"识别失败,返回代码: {result.returncode}"
|
||
|
|
||
|
# 将结果保存为字典格式,添加到结果列表中
|
||
|
results.append({
|
||
|
"图片名称": image_file,
|
||
|
"文件路径": temp_image_file,
|
||
|
"识别结果": ocr_result
|
||
|
})
|
||
|
|
||
|
except Exception as e:
|
||
|
# 捕获处理图片时的错误并存储
|
||
|
results.append({
|
||
|
"图片名称": image_file,
|
||
|
"文件路径": temp_image_file,
|
||
|
"识别结果": f"处理图片时发生错误: {str(e)}"
|
||
|
})
|
||
|
|
||
|
# 将结果保存到指定的 JSON 文件
|
||
|
with open(output_json_file, 'w', encoding='utf-8') as f:
|
||
|
json.dump(results, f, ensure_ascii=False, indent=4) # 使用 UTF-8 保存 JSON 文件
|
||
|
print(f"识别结果已保存到 {output_json_file}")
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
# 设置待处理图片所在的文件夹路径
|
||
|
# image_folder = r"E:\Project\PaddleOcr_v4\contract"
|
||
|
image_folder = r"E:\Project\PaddleOcr_v4\OCR_LLM_attribute\output_2\MaiMaiHeTong"
|
||
|
# 设置输出 JSON 文件的路径
|
||
|
output_json_file = r"E:\Project\PaddleOcr_v4\OCR_LLM_attribute\output_2\MaiMaiHeTong_results.json"
|
||
|
|
||
|
# 调用函数处理图片并保存结果
|
||
|
process_images_in_folder(image_folder, output_json_file)
|
||
|
|