Skip to content

pixano.data.importers.dota_importer

DOTAImporter(name, description, input_dirs, splits)

Bases: Importer

Importer class for DOTA dataset

Attributes:

Name Type Description
info DatasetInfo

Dataset information

input_dirs dict[str, Path]

Dataset input directories

Parameters:

Name Type Description Default
name str

Dataset name

required
description str

Dataset description

required
input_dirs dict[str, Path]

Dataset input directories

required
splits list[str]

Dataset splits

required
Source code in pixano/data/importers/dota_importer.py
def __init__(
    self,
    name: str,
    description: str,
    input_dirs: dict[str, Path],
    splits: list[str],
):
    """Initialize DOTA Importer

    Args:
        name (str): Dataset name
        description (str): Dataset description
        input_dirs (dict[str, Path]): Dataset input directories
        splits (list[str]): Dataset splits
    """

    # Create tables
    tables = super().create_tables(
        media_fields={"image": "image"},
        object_fields={
            "original_id": "str",
            "bbox": "bbox",
            "category": "str",
        },
    )

    # Create categories
    features_values = FeaturesValues(
        objects={
            "category": FeatureValues(
                restricted=False,
                values=[
                    "plane",  # id=1
                    "ship",  # id=2
                    "storage tank",  # id=3
                    "baseball diamond",  # id=4
                    "tennis court",  # id=5
                    "basketball court",  # id=6
                    "ground track field",  # id=7
                    "harbor",  # id=8
                    "bridge",  # id=9
                    "large vehicle",  # id=10
                    "small vehicle",  # id=11
                    "helicopter",  # id=12
                    "roundabout",  # id=13
                    "soccer ball field",  # id=14
                    "swimming pool",  # id=15
                    "container crane",  # id=16
                    "airport",  # id=17
                    "helipad",  # id=18
                ],
            )
        }
    )

    # Initialize Importer
    self.input_dirs = input_dirs
    super().__init__(name, description, tables, splits, features_values)

import_rows()

Process dataset rows for import

Yields:

Type Description
Iterator

Processed rows

Source code in pixano/data/importers/dota_importer.py
def import_rows(self) -> Iterator:
    """Process dataset rows for import

    Yields:
        Iterator: Processed rows
    """
    for split in self.info.splits:
        # Get images paths
        image_paths = glob.glob(str(self.input_dirs["image"] / split / "*.png"))
        image_paths = [Path(p) for p in sorted(image_paths, key=natural_key)]

        # Process rows
        for im_path in image_paths:
            # Load image annotations
            im_anns_file = (
                self.input_dirs["objects"]
                / split
                / "hbb"
                / im_path.name.replace("png", "txt")
            )
            with open(im_anns_file, encoding="utf-8") as f:
                im_anns = [line.strip().split() for line in f]

            # Allow DOTA largest images
            PILImage.MAX_IMAGE_PIXELS = 806504000

            # Get image dimensions and thumbnail
            with PILImage.open(im_path) as im:
                im_w, im_h = im.size
                im_thumb = image_to_thumbnail(im)

            # Set image URI
            im_uri = f"image/{split}/{im_path.name}"

            # Set unique item id
            item_id = shortuuid.uuid()

            # Return rows
            rows = {
                "main": {
                    "db": [
                        {
                            "id": item_id,
                            "original_id": im_path.stem,
                            "views": ["image"],
                            "split": split,
                        }
                    ]
                },
                "media": {
                    "image": [
                        {
                            "id": item_id,
                            "image": Image(im_uri, None, im_thumb).to_dict(),
                        }
                    ]
                },
                "objects": {
                    "objects": [
                        {
                            "id": shortuuid.uuid(),
                            "item_id": item_id,
                            "view_id": "image",
                            "bbox": BBox.from_xyxy(
                                [
                                    float(ann[0]),
                                    float(ann[1]),
                                    float(ann[4]),
                                    float(ann[5]),
                                ]
                            )
                            .normalize(im_h, im_w)
                            .to_dict(),
                            "category": str(ann[8]).replace("-", " "),
                        }
                        for ann in im_anns
                    ]
                },
            }

            yield rows