Bases: ImageFolderBuilder
Builder for vqa datasets stored in a folder.
Source code in pixano/datasets/builders/folders/base.py
| def __init__(
self,
media_dir: Path | str,
library_dir: Path | str,
info: DatasetInfo,
dataset_path: Path | str,
dataset_item: type[DatasetItem] | None = None,
use_image_name_as_id: bool = False,
) -> None:
"""Initialize the `FolderBaseBuilder`.
Args:
media_dir: The global media directory.
library_dir: The global directory for Pixano datasets library.
dataset_item: The dataset item schema.
info: User informations (name, description, ...) for the dataset.
dataset_path: Path to dataset, relative to media_dir.
use_image_name_as_id: If True, use image base name as image id.
Images MUST have unique base names.
When no metadata file exists, also use it as item id,
else, use 'item_#'
This allows to reuse image embeddings after dataset overwrite.
"""
info.workspace = self.WORKSPACE_TYPE
if self.DEFAULT_SCHEMA is not None and dataset_item is None:
dataset_item = self.DEFAULT_SCHEMA
if dataset_item is None:
raise ValueError("A schema is required.")
self.use_image_name_as_id = use_image_name_as_id
self.media_dir = Path(media_dir)
dataset_path = Path(dataset_path)
self.source_dir = self.media_dir / dataset_path
if not self.source_dir.is_dir():
raise ValueError("A source path (media_dir / dataset_path) is required.")
target_dir = Path(library_dir) / "_".join(dataset_path.parts)
super().__init__(target_dir=target_dir, dataset_item=dataset_item, info=info)
self.views_schema: dict[str, type[View]] = {}
self.entities_schema: dict[str, type[Entity]] = {}
self.annotations_schema: dict[str, type[Annotation]] = {}
for k, s in self.schemas.items():
if is_view(s):
self.views_schema.update({k: s})
elif is_entity(s):
self.entities_schema.update({k: s})
elif is_annotation(s):
self.annotations_schema.update({k: s})
if not self.views_schema or not self.entities_schema:
raise ValueError("At least one View and one Entity schema must be defined in the schemas argument.")
# TODO - allow multiview in base FolderBuilder
if len(self.views_schema) > 1:
raise ValueError("Only one view schema is supported in folder based builders.")
|