Skip to content

pixano.datasets.builders.folders.vqa

VQAFolderBuilder(media_dir, library_dir, info, dataset_path, dataset_item=None, use_image_name_as_id=False)

Bases: ImageFolderBuilder

Builder for vqa datasets stored in a folder.

Source code in pixano/datasets/builders/folders/base.py
def __init__(
    self,
    media_dir: Path | str,
    library_dir: Path | str,
    info: DatasetInfo,
    dataset_path: Path | str,
    dataset_item: type[DatasetItem] | None = None,
    use_image_name_as_id: bool = False,
) -> None:
    """Initialize the `FolderBaseBuilder`.

    Args:
        media_dir: The global media directory.
        library_dir: The global directory for Pixano datasets library.
        dataset_item: The dataset item schema.
        info: User informations (name, description, ...) for the dataset.
        dataset_path: Path to dataset, relative to media_dir.
        use_image_name_as_id: If True, use image base name as image id.
                              Images MUST have unique base names.
                              When no metadata file exists, also use it as item id,
                              else, use 'item_#'
                              This allows to reuse image embeddings after dataset overwrite.
    """
    info.workspace = self.WORKSPACE_TYPE
    if self.DEFAULT_SCHEMA is not None and dataset_item is None:
        dataset_item = self.DEFAULT_SCHEMA
    if dataset_item is None:
        raise ValueError("A schema is required.")

    self.use_image_name_as_id = use_image_name_as_id

    self.media_dir = Path(media_dir)
    dataset_path = Path(dataset_path)
    self.source_dir = self.media_dir / dataset_path
    if not self.source_dir.is_dir():
        raise ValueError("A source path (media_dir / dataset_path) is required.")

    target_dir = Path(library_dir) / "_".join(dataset_path.parts)
    super().__init__(target_dir=target_dir, dataset_item=dataset_item, info=info)

    self.views_schema: dict[str, type[View]] = {}
    self.entities_schema: dict[str, type[Entity]] = {}
    self.annotations_schema: dict[str, type[Annotation]] = {}

    for k, s in self.schemas.items():
        if is_view(s):
            self.views_schema.update({k: s})
        elif is_entity(s):
            self.entities_schema.update({k: s})
        elif is_annotation(s):
            self.annotations_schema.update({k: s})
    if not self.views_schema or not self.entities_schema:
        raise ValueError("At least one View and one Entity schema must be defined in the schemas argument.")

    # TODO - allow multiview in base FolderBuilder
    if len(self.views_schema) > 1:
        raise ValueError("Only one view schema is supported in folder based builders.")