Bases: FolderBaseBuilder
Builder for image datasets stored in a folder.
Source code in pixano/datasets/builders/folders/base.py
| def __init__(
self,
source_dir: Path | str,
target_dir: Path | str,
info: DatasetInfo,
dataset_item: type[DatasetItem] | None = None,
url_prefix: Path | str | None = None,
) -> None:
"""Initialize the `FolderBaseBuilder`.
Args:
source_dir: The source directory for the dataset.
target_dir: The target directory for the dataset.
dataset_item: The dataset item schema.
info: User informations (name, description, ...) for the dataset.
url_prefix: The path to build relative URLs for the views. Useful to build dataset libraries to pass the
relative path from the media directory.
"""
info.workspace = self.WORKSPACE_TYPE
if self.DEFAULT_SCHEMA is not None and dataset_item is None:
dataset_item = self.DEFAULT_SCHEMA
if dataset_item is None:
raise ValueError("A schema is required.")
super().__init__(target_dir=target_dir, dataset_item=dataset_item, info=info)
self.source_dir = Path(source_dir)
if url_prefix is None:
url_prefix = Path(".")
else:
url_prefix = Path(url_prefix)
self.url_prefix = url_prefix
self.views_schema: dict[str, type[View]] = {}
self.entities_schema: dict[str, type[Entity]] = {}
self.annotations_schema: dict[str, type[Annotation]] = {}
for k, s in self.schemas.items():
if is_view(s):
self.views_schema.update({k: s})
elif is_entity(s):
self.entities_schema.update({k: s})
elif is_annotation(s):
self.annotations_schema.update({k: s})
if not self.views_schema or not self.entities_schema:
raise ValueError("At least one View and one Entity schema must be defined in the schemas argument.")
# for compatibility with actual ImageFolderBuilder that allows only one view and one entity
# TODO - allow multiview and multi entities in base FolderBuilder
# Note: technically VQA also allow only one view, so for now we keep the ValueError
if len(self.views_schema) == 1:
self.view_name, self.view_schema = list(self.views_schema.items())[0]
else:
raise ValueError("Only one view schema is supported in folder based builders.")
if len(self.entities_schema) == 1:
self.entity_name, self.entity_schema = list(self.entities_schema.items())[0]
|