Skip to content

pixano.datasets.exporters.default_json_dataset_exporter

DefaultJSONDatasetExporter(dataset, export_dir, overwrite=False)

Bases: DatasetExporter

Default JSON dataset exporter.

Source code in pixano/datasets/exporters/dataset_exporter.py
def __init__(self, dataset: Dataset, export_dir: str | Path, overwrite: bool = False):
    """Initialize a new instance of the DatasetExporter class.

    Args:
        dataset: The dataset to be exported.
        export_dir: The directory where the exported files will be saved.
        overwrite: Whether to overwrite existing directory.
    """
    self.dataset = dataset
    self.export_dir = Path(export_dir)
    self._overwrite = overwrite

export_dataset_item(export_data, dataset_item)

Store the dataset item in the export_data dictionary.

Parameters:

Name Type Description Default
export_data dict[str, Any]

A dictionary containing the data to be exported.

required
dataset_item DatasetItem

The dataset item to be exported.

required

Returns:

Type Description
dict[str, Any]

A dictionary containing the data to be exported.

Source code in pixano/datasets/exporters/default_json_dataset_exporter.py
def export_dataset_item(self, export_data: dict[str, Any], dataset_item: DatasetItem) -> dict[str, Any]:
    """Store the dataset item in the `export_data` dictionary.

    Args:
        export_data: A dictionary containing the data to be exported.
        dataset_item: The dataset item to be exported.

    Returns:
        A dictionary containing the data to be exported.
    """
    data: dict[str, BaseSchema | list[BaseSchema] | None] = dataset_item.to_schemas_data(self.dataset.schema)
    for schema_name, schema_data in data.items():
        if schema_data:
            schema_data = schema_data if isinstance(schema_data, list) else [schema_data]
            group = schema_to_group(schema_data[0])
            if group == SchemaGroup.ITEM:
                export_data[group_to_str(group, plural=True)].extend(
                    [s.model_dump(exclude_timestamps=True) for s in schema_data]
                )
            else:
                export_data[group_to_str(group, plural=True)][schema_name].extend(
                    [s.model_dump(exclude_timestamps=True) for s in schema_data]
                )
    return export_data

initialize_export_data(info, sources)

Initialize the dictionary or list of dictionaries to be exported.

Parameters:

Name Type Description Default
info DatasetInfo

The dataset information.

required
sources list[Source]

The list of sources.

required

Returns:

Type Description
dict[str, Any]

A dictionary containing the data to be exported.

Source code in pixano/datasets/exporters/default_json_dataset_exporter.py
def initialize_export_data(self, info: DatasetInfo, sources: list[Source]) -> dict[str, Any]:
    """Initialize the dictionary or list of dictionaries to be exported.

    Args:
        info: The dataset information.
        sources: The list of sources.

    Returns:
        A dictionary containing the data to be exported.
    """
    export_data = {"info": info.model_dump()}

    for group, schemas in self.dataset.schema.groups.items():
        if group == SchemaGroup.EMBEDDING:
            continue
        elif group == SchemaGroup.ITEM:
            export_data[group_to_str(group, plural=True)] = []
        else:
            export_data[group_to_str(group, plural=True)] = {schema: [] for schema in schemas}
    export_data[group_to_str(SchemaGroup.SOURCE, plural=True)] = [
        s.model_dump(exclude_timestamps=True) for s in sources
    ]
    return export_data

save_data(export_data, split, file_name, file_num)

Save data to the specified directory.

The saved directory has the following structure

export_dir/{split}{file_name}_0.json /... /{split}{file_name}{file_num}.json /... /{split}{file_name}_n.json

Parameters:

Name Type Description Default
export_data dict[str, Any]

The dictionary containing the data to be saved.

required
split str

The split of the dataset item being saved.

required
file_name str

The name of the file to save the data in.

required
file_num int

The number of the file to save the data in.

required
Source code in pixano/datasets/exporters/default_json_dataset_exporter.py
def save_data(self, export_data: dict[str, Any], split: str, file_name: str, file_num: int) -> None:
    """Save data to the specified directory.

    The saved directory has the following structure:
        export_dir/{split}_{file_name}_0.json
                  /...
                  /{split}_{file_name}_{file_num}.json
                  /...
                  /{split}_{file_name}_n.json


    Args:
        export_data: The dictionary containing the data to be saved.
        split: The split of the dataset item being saved.
        file_name: The name of the file to save the data in.
        file_num: The number of the file to save the data in.
    """
    json_path = self.export_dir / f"{split}_{file_name}_{file_num}.json"
    json_path.write_text(json.dumps(jsonable_encoder(export_data), indent=4), encoding="utf-8")