PaddleOcr_v4/predict_system.py

import os
import subprocess
import re

# 图片文件夹路径
base_image_dir = 'E:/Project/PaddleOCR/bilu_txt_dispose/Crop_img'
# OCR 结果保存路径
output_dir = 'E:/Project/PaddleOCR/bilu_txt_dispose/OCR/OCR_txt'

# 创建输出目录
os.makedirs(output_dir, exist_ok=True)

# 用于去除 ANSI 转义序列的正则表达式
ansi_escape = re.compile(r'\x1b[^m]*m')


def remove_ansi_escape_sequences(text):
    # 去除 ANSI 转义序列
    text = ansi_escape.sub('', text)
    # 去除换行符
    text = text.replace('\n', '')
    return text


# 遍历每个子文件夹
for foldername in os.listdir(base_image_dir):
    folder_path = os.path.join(base_image_dir, foldername)

    if not os.path.isdir(folder_path):
        continue

    # 获取每个子文件夹中的所有图片文件路径
    image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if
                   os.path.isfile(os.path.join(folder_path, f))]

    # 保存结果的文件路径
    result_file_path = os.path.join(output_dir, f'{foldername}.txt')

    with open(result_file_path, 'w', encoding='utf-8') as result_file:
        # 循环处理每个图片文件
        for image_file in image_files:
            # 构建命令行命令
            command = [
                'python', 'tools/infer/predict_system_1.py',
                '--use_gpu=False',
                f'--cls_model_dir=./models/cls',
                f'--rec_model_dir=./models/rec',
                f'--det_model_dir=./models/det',
                f'--image_dir={image_file}'
            ]

            # 执行命令
            print(f"Processing {os.path.basename(image_file)} in {foldername}...")
            try:
                result = subprocess.run(command, capture_output=True, text=True, encoding='utf-8')

                # 解析并写入识别结果
                result_output = remove_ansi_escape_sequences(result.stdout.strip())
                if result_output:
                    result_file.write(f"{result_output}\n")

                if result.stderr:
                    print(f"Error processing {os.path.basename(image_file)} in {foldername}: {result.stderr}")
            except UnicodeDecodeError as e:
                print(
                    f"UnicodeDecodeError occurred while processing {os.path.basename(image_file)} in {foldername}: {e}")
                continue  # Skip this file and continue with the next one

    print(f"Results for folder {foldername} saved to {result_file_path}")

print("All folders processed.")