PaddleOcr_v4/OCR_LLM_attribute/0_Ocr_txt_scripting.py

import os
import subprocess
import json


def process_images_in_folder(image_folder, output_json_file):
    results = []  # 用于存储所有图片的识别结果

    # 使用 os.walk 遍历文件夹及其子文件夹
    for root, dirs, files in os.walk(image_folder):
        for image_file in files:
            # 获取每个图片的完整路径
            temp_image_file = os.path.join(root, image_file)

            # 检查是否为图片文件
            if os.path.splitext(image_file)[-1].lower() in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
                # 构造命令，调用 `predict_system_1.py` 进行处理
                command = [
                    'python', 'tools/infer/predict_system_1.py',
                    '--use_gpu=False',
                    '--cls_model_dir=./models/cls',
                    '--rec_model_dir=./models/rec',
                    '--det_model_dir=./models/det',
                    f'--image_dir={temp_image_file}'
                ]

                try:
                    # 执行命令，指定 UTF-8 编码并捕获输出
                    result = subprocess.run(command, capture_output=True, text=True, encoding='utf-8')

                    # 检查命令是否成功执行
                    if result.returncode == 0:
                        ocr_result = result.stdout.strip()  # 获取识别结果
                    else:
                        ocr_result = f"识别失败，返回代码: {result.returncode}"

                    # 将结果保存为字典格式，添加到结果列表中
                    results.append({
                        "图片名称": image_file,
                        "文件路径": temp_image_file,
                        "识别结果": ocr_result
                    })

                except Exception as e:
                    # 捕获处理图片时的错误并存储
                    results.append({
                        "图片名称": image_file,
                        "文件路径": temp_image_file,
                        "识别结果": f"处理图片时发生错误: {str(e)}"
                    })

    # 将结果保存到指定的 JSON 文件
    with open(output_json_file, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=4)  # 使用 UTF-8 保存 JSON 文件
        print(f"识别结果已保存到 {output_json_file}")


if __name__ == "__main__":
    # 设置待处理图片所在的文件夹路径
    # image_folder = r"E:\Project\PaddleOcr_v4\contract"
    image_folder = r"E:\Project\PaddleOcr_v4\OCR_LLM_attribute\output_2\MaiMaiHeTong"
    # 设置输出 JSON 文件的路径
    output_json_file = r"E:\Project\PaddleOcr_v4\OCR_LLM_attribute\output_2\MaiMaiHeTong_results.json"

    # 调用函数处理图片并保存结果
    process_images_in_folder(image_folder, output_json_file)
添加ocr接口日志打印功能 10 months ago			`import os`
			`import subprocess`
			`import json`


			`def process_images_in_folder(image_folder, output_json_file):`
			`results = [] # 用于存储所有图片的识别结果`

			`# 使用 os.walk 遍历文件夹及其子文件夹`
			`for root, dirs, files in os.walk(image_folder):`
			`for image_file in files:`
			`# 获取每个图片的完整路径`
			`temp_image_file = os.path.join(root, image_file)`

			`# 检查是否为图片文件`
			`if os.path.splitext(image_file)[-1].lower() in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:`
			# 构造命令，调用 `predict_system_1.py` 进行处理
			`command = [`
			`'python', 'tools/infer/predict_system_1.py',`
			`'--use_gpu=False',`
			`'--cls_model_dir=./models/cls',`
			`'--rec_model_dir=./models/rec',`
			`'--det_model_dir=./models/det',`
			`f'--image_dir={temp_image_file}'`
			`]`

			`try:`
			`# 执行命令，指定 UTF-8 编码并捕获输出`
			`result = subprocess.run(command, capture_output=True, text=True, encoding='utf-8')`

			`# 检查命令是否成功执行`
			`if result.returncode == 0:`
			`ocr_result = result.stdout.strip() # 获取识别结果`
			`else:`
			`ocr_result = f"识别失败，返回代码: {result.returncode}"`

			`# 将结果保存为字典格式，添加到结果列表中`
			`results.append({`
			`"图片名称": image_file,`
			`"文件路径": temp_image_file,`
			`"识别结果": ocr_result`
			`})`

			`except Exception as e:`
			`# 捕获处理图片时的错误并存储`
			`results.append({`
			`"图片名称": image_file,`
			`"文件路径": temp_image_file,`
			`"识别结果": f"处理图片时发生错误: {str(e)}"`
			`})`

			`# 将结果保存到指定的 JSON 文件`
			`with open(output_json_file, 'w', encoding='utf-8') as f:`
			`json.dump(results, f, ensure_ascii=False, indent=4) # 使用 UTF-8 保存 JSON 文件`
			`print(f"识别结果已保存到 {output_json_file}")`


			`if __name__ == "__main__":`
			`# 设置待处理图片所在的文件夹路径`
			`# image_folder = r"E:\Project\PaddleOcr_v4\contract"`
			`image_folder = r"E:\Project\PaddleOcr_v4\OCR_LLM_attribute\output_2\MaiMaiHeTong"`
			`# 设置输出 JSON 文件的路径`
			`output_json_file = r"E:\Project\PaddleOcr_v4\OCR_LLM_attribute\output_2\MaiMaiHeTong_results.json"`

			`# 调用函数处理图片并保存结果`
			`process_images_in_folder(image_folder, output_json_file)`