`pixano.features.schemas.base_schema`

`BaseSchema(created_at=None, updated_at=None, **data)`

Bases: LanceModel

Base class for all schemas.

All schemas should inherit from this class and therefore all elements in the dataset contains an id.

Attributes:

Name	Type	Description
`id`	`str`	the id of the manipulated object.
`created_at`	`datetime`	the creation date of the object.
`updated_at`	`datetime`	the last modification date of the object.

Note

If the created_at and updated_at fields are not provided, they are set to the current date and time.

Raises ValidationError if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Parameters:

Name	Type	Description	Default
`created_at`	`datetime \| None`	The creation date of the object.	`None`
`updated_at`	`datetime \| None`	The last modification date of the object.	`None`
`data`	`Any`	The data of the object validated by Pydantic.	`{}`

Source code in pixano/features/schemas/base_schema.py

def __init__(self, /, created_at: datetime | None = None, updated_at: datetime | None = None, **data: Any):
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.

    Args:
        created_at: The creation date of the object.
        updated_at: The last modification date of the object.
        data: The data of the object validated by Pydantic.
    """
    created_at, updated_at = validate_and_init_create_at_and_update_at(created_at, updated_at)
    data.update({"created_at": created_at, "updated_at": updated_at})
    super().__init__(**data)

`dataset` `property` `writable`

Get the dataset.

`table_name` `property` `writable`

Get the table name.

`deserialize(dataset_schema_json)` `staticmethod`

Deserialize the dataset schema.

Parameters:

Name	Type	Description	Default
`dataset_schema_json`	`dict[str, str \| dict[str, Any]]`	Serialized dataset schema.	required

Returns:

Type	Description
`type['BaseSchema']`	The dataset schema.

Source code in pixano/features/schemas/base_schema.py

@staticmethod
def deserialize(dataset_schema_json: dict[str, str | dict[str, Any]]) -> type["BaseSchema"]:
    """Deserialize the dataset schema.

    Args:
        dataset_schema_json: Serialized dataset schema.

    Returns:
        The dataset schema.
    """
    from .registry import _PIXANO_SCHEMA_REGISTRY, _SCHEMA_REGISTRY

    json_fields = dataset_schema_json["fields"]
    if not isinstance(json_fields, dict):
        raise ValueError("Fields should be a dictionary.")

    fields: dict[str, Any] = {}
    for key, value in json_fields.items():
        if value["type"] in _TYPES_REGISTRY:
            type_ = _TYPES_REGISTRY[value["type"]]
        elif value["type"] == "FixedSizeList":  # LanceDB Vector
            type_ = value["type"]
            dim = value["dim"]
            value_type = DESERIALIZE_PYARROW_DATATYPE[value["value_type"]]
            type_ = Vector(dim, value_type)
        else:
            raise ValueError(f"Type {value['type']} not registered")
        if value["collection"]:
            type_ = list[type_]  # type: ignore[valid-type]
        fields[key] = (type_, ...)

    schema, base_schema = dataset_schema_json["schema"], dataset_schema_json["base_schema"]

    if not isinstance(schema, str) or not isinstance(base_schema, str):
        raise ValueError("Schema and base schema should be strings.")

    if schema in _SCHEMA_REGISTRY:
        table_type = _SCHEMA_REGISTRY[schema]
    else:
        table_type = _PIXANO_SCHEMA_REGISTRY[base_schema]

    model = create_model(dataset_schema_json["schema"], **fields, __base__=table_type)

    return model

`model_copy(*, update=None, deep=False)`

Returns a copy of the model.

Parameters:

Name	Type	Description	Default
`update`	`dict[str, Any] \| None`	Values to change/add in the new model.	`None`
`deep`	`bool`	Set to `True` to make a deep copy of the model.	`False`

Returns:

Type	Description
`Self`	New model instance.

Source code in pixano/features/schemas/base_schema.py

def model_copy(self, *, update: dict[str, Any] | None = None, deep: bool = False) -> Self:
    """Returns a copy of the model.

    Args:
        update: Values to change/add in the new model.
        deep: Set to `True` to make a deep copy of the model.

    Returns:
        New model instance.
    """
    # Wrap the pydantic `model_copy` method to prevent copying the dataset.
    dataset = self._dataset
    self._dataset = None

    copy = super().model_copy(update=update, deep=deep)
    copy.dataset = dataset
    return copy

`model_dump(exclude_timestamps=False, **kwargs)`

Dump the model to a dictionary.

Parameters:

Name	Type	Description	Default
`exclude_timestamps`	`bool`	Exclude timestamps "created_at" and "updated_at" from the model dump. Useful for comparing models without timestamps.	`False`
`kwargs`	`Any`	Arguments for pydantic `BaseModel.model_dump()`.	`{}`

Returns:

Type	Description
`dict[str, Any]`	The model dump.

Source code in pixano/features/schemas/base_schema.py

def model_dump(self, exclude_timestamps: bool = False, **kwargs: Any) -> dict[str, Any]:
    """Dump the model to a dictionary.

    Args:
        exclude_timestamps: Exclude timestamps "created_at" and "updated_at" from the model dump. Useful for
            comparing models without timestamps.
        kwargs: Arguments for pydantic `BaseModel.model_dump()`.

    Returns:
        The model dump.
    """
    model_dump = super().model_dump(**kwargs)
    if exclude_timestamps:
        model_dump.pop("created_at", None)
        model_dump.pop("updated_at", None)
    return model_dump

`resolve_ref(ref)`

resolve_ref(ref: 'ItemRef') -> 'Item'

resolve_ref(ref: 'ViewRef') -> 'View'

resolve_ref(ref: 'EmbeddingRef') -> 'Embedding'

resolve_ref(ref: 'EntityRef') -> 'Entity'

resolve_ref(ref: 'AnnotationRef') -> 'Annotation'

resolve_ref(ref: 'SourceRef') -> 'Source'

resolve_ref(ref: 'SchemaRef') -> 'BaseSchema'

Resolve a reference to a schema object in the dataset.

Parameters:

Name	Type	Description	Default
`ref`	`'SchemaRef' \| 'ItemRef' \| 'ViewRef' \| 'EmbeddingRef' \| 'EntityRef' \| 'AnnotationRef' \| 'SourceRef'`	The reference to resolve.	required

Returns:

Type	Description
`'BaseSchema' \| 'Item' \| 'View' \| 'Embedding' \| 'Entity' \| 'Annotation' \| 'Source'`	The resolved schema object.

Source code in pixano/features/schemas/base_schema.py

def resolve_ref(
    self, ref: "SchemaRef" | "ItemRef" | "ViewRef" | "EmbeddingRef" | "EntityRef" | "AnnotationRef" | "SourceRef"
) -> "BaseSchema" | "Item" | "View" | "Embedding" | "Entity" | "Annotation" | "Source":
    """Resolve a reference to a schema object in the dataset.

    Args:
        ref: The reference to resolve.

    Returns:
        The resolved schema object.
    """
    return self.dataset.resolve_ref(ref)

`serialize()` `classmethod`

Serialize the table.

Source code in pixano/features/schemas/base_schema.py

@classmethod
def serialize(cls) -> dict[str, str | dict[str, Any]]:
    """Serialize the table."""
    from .registry import _PIXANO_SCHEMA_REGISTRY

    # schema can be customized by the user
    # base_schema is the closest schema in the registry
    super_type = get_super_type_from_dict(cls, _PIXANO_SCHEMA_REGISTRY)
    if super_type is None:
        raise ValueError(f"Schema {cls.__name__} does not have a super type in the registry.")
    json: dict[str, str | dict[str, Any]] = {
        "schema": cls.__name__,
        "base_schema": super_type.__name__,
    }
    fields: dict[str, Any] = {}
    for field_name, field in cls.model_fields.items():
        if isinstance(field.annotation, GenericAlias):
            origin = field.annotation.__origin__
            args = field.annotation.__args__

            if origin in [list, tuple]:
                if issubclass(args[0], tuple(_TYPES_REGISTRY.values())):
                    fields[field_name] = {
                        "type": args[0].__name__,
                        "collection": True,
                    }
                else:
                    fields[field_name] = {
                        "type": args[0].__name__,
                        "collection": True,
                    }
            else:
                raise NotImplementedError("Should be a list or tuple.")
        else:
            if issubclass(field.annotation, tuple(_TYPES_REGISTRY.values())):
                fields[field_name] = {
                    "type": field.annotation.__name__,
                    "collection": False,
                }
            elif issubclass(field.annotation, FixedSizeListMixin):  # LanceDB Vector
                fields[field_name] = {
                    "type": field.annotation.__name__,
                    "collection": False,
                    "dim": field.annotation.dim(),
                    "value_type": SERIALIZE_PYARROW_DATATYPE[field.annotation.value_arrow_type()],
                }
            else:
                fields[field_name] = {
                    "type": field.annotation.__name__,
                    "collection": False,
                }
    json["fields"] = fields
    return json

`is_base_schema(cls, strict=False)`

Check if a class is a BaseSchema or subclass of BaseSchema.

Source code in pixano/features/schemas/base_schema.py

def is_base_schema(cls: type, strict: bool = False) -> bool:
    """Check if a class is a `BaseSchema` or subclass of `BaseSchema`."""
    return issubclass_strict(cls, BaseSchema, strict)

pixano.features.schemas.base_schema

BaseSchema(created_at=None, updated_at=None, **data)

dataset property writable

table_name property writable

deserialize(dataset_schema_json) staticmethod

model_copy(*, update=None, deep=False)

model_dump(exclude_timestamps=False, **kwargs)

resolve_ref(ref)

serialize() classmethod

is_base_schema(cls, strict=False)

`pixano.features.schemas.base_schema`

`BaseSchema(created_at=None, updated_at=None, **data)`

`dataset` `property` `writable`

`table_name` `property` `writable`

`deserialize(dataset_schema_json)` `staticmethod`

`model_copy(*, update=None, deep=False)`

`model_dump(exclude_timestamps=False, **kwargs)`

`resolve_ref(ref)`

`serialize()` `classmethod`

`is_base_schema(cls, strict=False)`