import os import subprocess import re # 图片文件夹路径 base_image_dir = 'E:/Project/PaddleOCR/bilu_txt_dispose/Crop_img' # OCR 结果保存路径 output_dir = 'E:/Project/PaddleOCR/bilu_txt_dispose/OCR/OCR_txt' # 创建输出目录 os.makedirs(output_dir, exist_ok=True) # 用于去除 ANSI 转义序列的正则表达式 ansi_escape = re.compile(r'\x1b[^m]*m') def remove_ansi_escape_sequences(text): # 去除 ANSI 转义序列 text = ansi_escape.sub('', text) # 去除换行符 text = text.replace('\n', '') return text # 遍历每个子文件夹 for foldername in os.listdir(base_image_dir): folder_path = os.path.join(base_image_dir, foldername) if not os.path.isdir(folder_path): continue # 获取每个子文件夹中的所有图片文件路径 image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))] # 保存结果的文件路径 result_file_path = os.path.join(output_dir, f'{foldername}.txt') with open(result_file_path, 'w', encoding='utf-8') as result_file: # 循环处理每个图片文件 for image_file in image_files: # 构建命令行命令 command = [ 'python', 'tools/infer/predict_system_1.py', '--use_gpu=False', f'--cls_model_dir=./models/cls', f'--rec_model_dir=./models/rec', f'--det_model_dir=./models/det', f'--image_dir={image_file}' ] # 执行命令 print(f"Processing {os.path.basename(image_file)} in {foldername}...") try: result = subprocess.run(command, capture_output=True, text=True, encoding='utf-8') # 解析并写入识别结果 result_output = remove_ansi_escape_sequences(result.stdout.strip()) if result_output: result_file.write(f"{result_output}\n") if result.stderr: print(f"Error processing {os.path.basename(image_file)} in {foldername}: {result.stderr}") except UnicodeDecodeError as e: print( f"UnicodeDecodeError occurred while processing {os.path.basename(image_file)} in {foldername}: {e}") continue # Skip this file and continue with the next one print(f"Results for folder {foldername} saved to {result_file_path}") print("All folders processed.")