PaddleOcr_v4/benchmark/PaddleOCR_DBNet/data_loader/dataset.py

# -*- coding: utf-8 -*-
# @Time    : 2019/8/23 21:54
# @Author  : zhoujun
import pathlib
import os
import cv2
import numpy as np
import scipy.io as sio
from tqdm.auto import tqdm

from base import BaseDataSet
from utils import order_points_clockwise, get_datalist, load, expand_polygon


class ICDAR2015Dataset(BaseDataSet):
    def __init__(
        self,
        data_path: str,
        img_mode,
        pre_processes,
        filter_keys,
        ignore_tags,
        transform=None,
        **kwargs,
    ):
        super().__init__(
            data_path, img_mode, pre_processes, filter_keys, ignore_tags, transform
        )

    def load_data(self, data_path: str) -> list:
        data_list = get_datalist(data_path)
        t_data_list = []
        for img_path, label_path in data_list:
            data = self._get_annotation(label_path)
            if len(data["text_polys"]) > 0:
                item = {"img_path": img_path, "img_name": pathlib.Path(img_path).stem}
                item.update(data)
                t_data_list.append(item)
            else:
                print("there is no suit bbox in {}".format(label_path))
        return t_data_list

    def _get_annotation(self, label_path: str) -> dict:
        boxes = []
        texts = []
        ignores = []
        with open(label_path, encoding="utf-8", mode="r") as f:
            for line in f.readlines():
                params = line.strip().strip("\ufeff").strip("\xef\xbb\xbf").split(",")
                try:
                    box = order_points_clockwise(
                        np.array(list(map(float, params[:8]))).reshape(-1, 2)
                    )
                    if cv2.contourArea(box) > 0:
                        boxes.append(box)
                        label = params[8]
                        texts.append(label)
                        ignores.append(label in self.ignore_tags)
                except:
                    print("load label failed on {}".format(label_path))
        data = {
            "text_polys": np.array(boxes),
            "texts": texts,
            "ignore_tags": ignores,
        }
        return data


class DetDataset(BaseDataSet):
    def __init__(
        self,
        data_path: str,
        img_mode,
        pre_processes,
        filter_keys,
        ignore_tags,
        transform=None,
        **kwargs,
    ):
        self.load_char_annotation = kwargs["load_char_annotation"]
        self.expand_one_char = kwargs["expand_one_char"]
        super().__init__(
            data_path, img_mode, pre_processes, filter_keys, ignore_tags, transform
        )

    def load_data(self, data_path: str) -> list:
        """
        从json文件中读取出 文本行的坐标和gt，字符的坐标和gt
        :param data_path:
        :return:
        """
        data_list = []
        for path in data_path:
            content = load(path)
            for gt in tqdm(content["data_list"], desc="read file {}".format(path)):
                img_path = os.path.join(content["data_root"], gt["img_name"])
                polygons = []
                texts = []
                illegibility_list = []
                language_list = []
                for annotation in gt["annotations"]:
                    if len(annotation["polygon"]) == 0 or len(annotation["text"]) == 0:
                        continue
                    if len(annotation["text"]) > 1 and self.expand_one_char:
                        annotation["polygon"] = expand_polygon(annotation["polygon"])
                    polygons.append(annotation["polygon"])
                    texts.append(annotation["text"])
                    illegibility_list.append(annotation["illegibility"])
                    language_list.append(annotation["language"])
                    if self.load_char_annotation:
                        for char_annotation in annotation["chars"]:
                            if (
                                len(char_annotation["polygon"]) == 0
                                or len(char_annotation["char"]) == 0
                            ):
                                continue
                            polygons.append(char_annotation["polygon"])
                            texts.append(char_annotation["char"])
                            illegibility_list.append(char_annotation["illegibility"])
                            language_list.append(char_annotation["language"])
                data_list.append(
                    {
                        "img_path": img_path,
                        "img_name": gt["img_name"],
                        "text_polys": np.array(polygons),
                        "texts": texts,
                        "ignore_tags": illegibility_list,
                    }
                )
        return data_list


class SynthTextDataset(BaseDataSet):
    def __init__(
        self,
        data_path: str,
        img_mode,
        pre_processes,
        filter_keys,
        transform=None,
        **kwargs,
    ):
        self.transform = transform
        self.dataRoot = pathlib.Path(data_path)
        if not self.dataRoot.exists():
            raise FileNotFoundError("Dataset folder is not exist.")

        self.targetFilePath = self.dataRoot / "gt.mat"
        if not self.targetFilePath.exists():
            raise FileExistsError("Target file is not exist.")
        targets = {}
        sio.loadmat(
            self.targetFilePath,
            targets,
            squeeze_me=True,
            struct_as_record=False,
            variable_names=["imnames", "wordBB", "txt"],
        )

        self.imageNames = targets["imnames"]
        self.wordBBoxes = targets["wordBB"]
        self.transcripts = targets["txt"]
        super().__init__(data_path, img_mode, pre_processes, filter_keys, transform)

    def load_data(self, data_path: str) -> list:
        t_data_list = []
        for imageName, wordBBoxes, texts in zip(
            self.imageNames, self.wordBBoxes, self.transcripts
        ):
            item = {}
            wordBBoxes = (
                np.expand_dims(wordBBoxes, axis=2)
                if (wordBBoxes.ndim == 2)
                else wordBBoxes
            )
            _, _, numOfWords = wordBBoxes.shape
            text_polys = wordBBoxes.reshape(
                [8, numOfWords], order="F"
            ).T  # num_words * 8
            text_polys = text_polys.reshape(numOfWords, 4, 2)  # num_of_words * 4 * 2
            transcripts = [word for line in texts for word in line.split()]
            if numOfWords != len(transcripts):
                continue
            item["img_path"] = str(self.dataRoot / imageName)
            item["img_name"] = (self.dataRoot / imageName).stem
            item["text_polys"] = text_polys
            item["texts"] = transcripts
            item["ignore_tags"] = [x in self.ignore_tags for x in transcripts]
            t_data_list.append(item)
        return t_data_list