Skip to content

pixano.data.importers.image_importer

ImageImporter(name, description, input_dirs, splits=None, media_fields=None)

Bases: Importer

Importer class for image datasets

Attributes:

Name Type Description
info DatasetInfo

Dataset information

input_dirs dict[str, Path]

Dataset input directories

Parameters:

Name Type Description Default
name str

Dataset name

required
description str

Dataset description

required
input_dirs dict[str, Path]

Dataset input directories

required
splits list[str]

Dataset splits. Defaults to None for datasets with no subfolders for splits.

None
media_fields dict[str, str]

Dataset media fields, with field names as keys and field types as values. Default to None.

None
Source code in pixano/data/importers/image_importer.py
def __init__(
    self,
    name: str,
    description: str,
    input_dirs: dict[str, Path],
    splits: list[str] = None,
    media_fields: dict[str, str] = None,
):
    """Initialize Image Importer

    Args:
        name (str): Dataset name
        description (str): Dataset description
        input_dirs (dict[str, Path]): Dataset input directories
        splits (list[str], optional): Dataset splits. Defaults to None for datasets with no subfolders for splits.
        media_fields (dict[str, str]): Dataset media fields, with field names as keys and field types as values. Default to None.
    """

    # Create dataset tables
    tables = super().create_tables(media_fields)

    # Create splits
    if splits is None:
        splits = ["dataset"]

    # Initialize Importer
    self.input_dirs = input_dirs
    super().__init__(name, description, tables, splits)

import_rows()

Process dataset rows for import

Yields:

Type Description
Iterator

Processed rows

Source code in pixano/data/importers/image_importer.py
def import_rows(self) -> Iterator:
    """Process dataset rows for import

    Yields:
        Iterator: Processed rows
    """

    for split in self.info.splits:
        # Get images paths
        image_paths = []
        for ftype in ["*.png", "*.jpg", "*.jpeg"]:
            if split == "dataset":
                image_paths.extend(glob.glob(str(self.input_dirs["image"] / ftype)))
            else:
                image_paths.extend(
                    glob.glob(str(self.input_dirs["image"] / split / ftype))
                )
        image_paths = [Path(p) for p in sorted(image_paths, key=natural_key)]

        # Process rows
        for im_path in image_paths:
            # Create image thumbnail
            im_thumb = image_to_thumbnail(im_path.read_bytes())

            # Set image URI
            im_uri = (
                f"image/{im_path.name}"
                if split == "dataset"
                else f"image/{split}/{im_path.name}"
            )

            # Set unique item id
            item_id = shortuuid.uuid()

            # Return rows
            rows = {
                "main": {
                    "db": [
                        {
                            "id": item_id,
                            "original_id": im_path.name,
                            "views": ["image"],
                            "split": split,
                            "label": "",
                        }
                    ]
                },
                "media": {
                    "image": [
                        {
                            "id": item_id,
                            "image": Image(im_uri, None, im_thumb).to_dict(),
                        }
                    ]
                },
            }
            yield rows