Skip to content

pixano.datasets.dataset_info

DatasetInfo(**data)

Bases: BaseModel

Information of a dataset.

Attributes:

Name Type Description
id str

Dataset ID. Must be unique.

name str

Dataset name.

description str

Dataset description.

estimated_size str

Dataset estimated size.

preview str

Path to a preview thumbnail.

Source code in pydantic/main.py
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

from_json(json_fp) staticmethod

Read DatasetInfo from JSON file.

Parameters:

Name Type Description Default
json_fp Path

JSON file path.

required

Returns:

Type Description
'DatasetInfo'

the dataset info object.

Source code in pixano/datasets/dataset_info.py
@staticmethod
def from_json(
    json_fp: Path,
) -> "DatasetInfo":
    """Read DatasetInfo from JSON file.

    Args:
        json_fp: JSON file path.

    Returns:
        the dataset info object.
    """
    info_json = json.loads(json_fp.read_text(encoding="utf-8"))
    info = DatasetInfo.model_validate(info_json)

    return info

load_directory(directory, return_path=False) staticmethod

load_directory(directory: Path, return_path: Literal[False] = False) -> list['DatasetInfo']
load_directory(directory: Path, return_path: Literal[True]) -> list[tuple['DatasetInfo', Path]]

Load list of DatasetInfo from directory.

Parameters:

Name Type Description Default
directory Path

Directory to load.

required
return_path bool

Return the paths of the datasets.

False

Returns:

Type Description
list[tuple['DatasetInfo', Path]] | list['DatasetInfo']

The list of DatasetInfo and the paths of the datasets.

Source code in pixano/datasets/dataset_info.py
@staticmethod
def load_directory(
    directory: Path,
    return_path: bool = False,
) -> list[tuple["DatasetInfo", Path]] | list["DatasetInfo"]:
    """Load list of DatasetInfo from directory.

    Args:
        directory: Directory to load.
        return_path: Return the paths of the datasets.

    Returns:
        The list of DatasetInfo and the paths of the datasets.
    """
    library: list[DatasetInfo] | list[tuple[DatasetInfo, Path]] = []

    # Browse directory
    for json_fp in sorted(directory.glob("*/info.json")):
        info: DatasetInfo = DatasetInfo.from_json(json_fp)
        try:
            info.preview = Image.open_url(
                str(json_fp.parent / "previews/dataset_preview.jpg"),
                Path("/"),
            )  # TODO choose correct preview name / path / extension
        except Exception:  # TODO: specify exception URL and Value
            info.preview = ""
        if return_path:
            library.append((info, json_fp.parent))  #  type: ignore[arg-type]
        else:
            library.append(info)

    if library == []:
        raise FileNotFoundError(f"No dataset found in {directory}.")

    return library

load_id(id, directory, return_path=False) staticmethod

load_id(id: str, directory: Path, return_path: Literal[False] = False) -> 'DatasetInfo'
load_id(id: str, directory: Path, return_path: Literal[True] = True) -> tuple['DatasetInfo', Path]

Load a specific DatasetInfo from directory.

Parameters:

Name Type Description Default
id str

The ID of the dataset to load.

required
directory Path

Directory to load.

required
return_path bool

Return the path of the dataset.

False

Returns:

Type Description
tuple['DatasetInfo', Path] | 'DatasetInfo'

The DatasetInfo.

Source code in pixano/datasets/dataset_info.py
@staticmethod
def load_id(id: str, directory: Path, return_path: bool = False) -> tuple["DatasetInfo", Path] | "DatasetInfo":
    """Load a specific DatasetInfo from directory.

    Args:
        id: The ID of the dataset to load.
        directory: Directory to load.
        return_path: Return the path of the dataset.

    Returns:
        The DatasetInfo.
    """
    for json_fp in directory.glob("*/info.json"):
        info = DatasetInfo.from_json(json_fp)
        if info.id == id:
            try:
                info.preview = Image.open_url(
                    str(json_fp.parent / "previews/dataset_preview.jpg"),
                    json_fp.parent / "media",
                )  # TODO choose correct preview name / path / extension
            except ValueError:
                info.preview = ""
            return (info, json_fp.parent) if return_path else info
    raise FileNotFoundError(f"No dataset found with ID {id}")

to_json(json_fp)

Writes the DatasetInfo object to a JSON file.

Parameters:

Name Type Description Default
json_fp Path

The path to the file where the DatasetInfo object will be written.

required
Source code in pixano/datasets/dataset_info.py
def to_json(self, json_fp: Path) -> None:
    """Writes the DatasetInfo object to a JSON file.

    Args:
        json_fp: The path to the file where the DatasetInfo object
            will be written.
    """
    json_fp.write_text(json.dumps(self.model_dump(), indent=4), encoding="utf-8")