Skip to content

pixano.data.dataset.dataset_info

DatasetInfo(**data)

Bases: BaseModel

DatasetInfo

Attributes:

Name Type Description
id str

Dataset ID

name str

Dataset name

description str

Dataset description

estimated_size str

Dataset estimated size

num_elements int

Number of elements in dataset

splits list[str]

Dataset splits

tables dict[str, list[DatasetTable]]

Dataset tables

features_values Optional[FeaturesValues]

(FeaturesValues, optional): existing values for each custom feature

preview str

Dataset preview

stats list[DatasetStat]

Dataset stats

Raises ValidationError if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Source code in pydantic/main.py
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            category=None,
        )

from_json(json_fp, load_stats=False, load_thumbnail=False) staticmethod

Read DatasetInfo from JSON file

Parameters:

Name Type Description Default
json_fp Path | S3Path

JSON file path

required
load_stats bool

Load dataset stats. Defaults to False.

False
load_thumbnail bool

Load dataset thumbnail. Defaults to False.

False

Returns:

Type Description
DatasetInfo

DatasetInfo

Source code in pixano/data/dataset/dataset_info.py
@staticmethod
def from_json(
    json_fp: Path | S3Path,
    load_stats: bool = False,
    load_thumbnail: bool = False,
) -> "DatasetInfo":
    """Read DatasetInfo from JSON file

    Args:
        json_fp (Path | S3Path): JSON file path
        load_stats (bool, optional): Load dataset stats. Defaults to False.
        load_thumbnail (bool, optional): Load dataset thumbnail. Defaults to False.

    Returns:
        DatasetInfo: DatasetInfo
    """

    if isinstance(json_fp, S3Path):
        with json_fp.open(encoding="utf-8") as json_file:
            info_json = json.load(json_file)
    else:
        with open(json_fp, encoding="utf-8") as json_file:
            info_json = json.load(json_file)

    info = DatasetInfo.model_validate(info_json)

    # Load dataset stats file
    if load_stats:
        stats_fp = json_fp.parent / "stats.json"
        if stats_fp.is_file():
            info.stats = DatasetStat.from_json(stats_fp)

    # Load thumbnail
    if load_thumbnail:
        thumb_fp = json_fp.parent / "preview.png"
        if thumb_fp.is_file():
            if isinstance(json_fp, S3Path):
                info.preview = thumb_fp.get_presigned_url()
            else:
                im = Image(uri=thumb_fp.absolute().as_uri())
                info.preview = im.url

    return info

load_directory(directory, load_thumbnail=False, load_stats=False) staticmethod

Load list of DatasetInfo from directory

Parameters:

Name Type Description Default
directory Path | S3Path

Directory to load

required
load_thumbnail bool

Load dataset thumbnail. Defaults to False.

False
load_stats bool

Load dataset stats. Defaults to False.

False

Returns:

Type Description
list[DatasetInfo]

List of DatasetInfo

Source code in pixano/data/dataset/dataset_info.py
@staticmethod
def load_directory(
    directory: Path | S3Path,
    load_thumbnail: bool = False,
    load_stats: bool = False,
) -> list["DatasetInfo"]:
    """Load list of DatasetInfo from directory

    Args:
        directory (Path | S3Path): Directory to load
        load_thumbnail (bool, optional): Load dataset thumbnail. Defaults to False.
        load_stats (bool, optional): Load dataset stats. Defaults to False.

    Returns:
        list[DatasetInfo]: List of DatasetInfo
    """

    infos = []

    # Browse directory
    for json_fp in sorted(directory.glob("*/db.json")):
        # Add dataset info to list
        infos.append(
            DatasetInfo.from_json(
                json_fp,
                load_thumbnail=load_thumbnail,
                load_stats=load_stats,
            )
        )

    return infos

save(save_dir)

Save DatasetInfo to json file

Parameters:

Name Type Description Default
save_dir Path | S3Path

Save directory

required
Source code in pixano/data/dataset/dataset_info.py
def save(self, save_dir: Path | S3Path):
    """Save DatasetInfo to json file

    Args:
        save_dir (Path | S3Path): Save directory
    """
    if isinstance(save_dir, S3Path):
        with (save_dir / "db.json").open(encoding="utf-8") as f:
            json.dump(self.model_dump(), f)
    else:
        with open(save_dir / "db.json", "w", encoding="utf-8") as f:
            json.dump(self.model_dump(), f)