Skip to content

pixano.features.schemas.embeddings.embedding

Embedding(created_at=None, updated_at=None, **data)

Bases: BaseSchema, ABC

Embeddings are used to define an embedding vector for an item in a dataset.

Attributes:

Name Type Description
item_ref ItemRef

Reference to the embedding's item.

vector Any

The embedding vector that should be defined by subclasses.

Source code in pixano/features/schemas/base_schema.py
def __init__(self, /, created_at: datetime | None = None, updated_at: datetime | None = None, **data: Any):
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.

    Args:
        created_at: The creation date of the object.
        updated_at: The last modification date of the object.
        data: The data of the object validated by Pydantic.
    """
    created_at, updated_at = validate_and_init_create_at_and_update_at(created_at, updated_at)
    data.update({"created_at": created_at, "updated_at": updated_at})
    super().__init__(**data)

item property

Get the embedding's item.

to_arrow_schema(remove_vector=False, remove_metadata=False) classmethod

Get the pyarrow schema of an Embedding.

This function allows to remove the vector field and the metadata from the schema which can be useful for adding data with auto-vectorization.

Parameters:

Name Type Description Default
remove_vector bool

Remove the vector field.

False
remove_metadata bool

Remove the metadata.

False

Returns:

Type Description
Schema

The pyarrow schema.

Source code in pixano/features/schemas/embeddings/embedding.py
@classmethod
def to_arrow_schema(
    cls,
    remove_vector: bool = False,
    remove_metadata: bool = False,
) -> pa.Schema:
    """Get the pyarrow schema of an `Embedding`.

    This function allows to remove the vector field and the metadata from the schema which can be useful for adding
    data with auto-vectorization.

    Args:
        remove_vector: Remove the vector field.
        remove_metadata: Remove the metadata.

    Returns:
        The pyarrow schema.
    """
    pa_schema = super().to_arrow_schema()
    if remove_vector:
        pa_schema = pa_schema.remove(pa_schema.get_field_index("vector"))
    if remove_metadata:
        pa_schema = pa_schema.remove_metadata()
    return pa_schema

ViewEmbedding(created_at=None, updated_at=None, **data)

Bases: Embedding, ABC

ViewEmbeddings are used to define an embedding vector for a view in a dataset.

Attributes:

Name Type Description
view_ref ViewRef

Reference to the embedding's view.

Source code in pixano/features/schemas/base_schema.py
def __init__(self, /, created_at: datetime | None = None, updated_at: datetime | None = None, **data: Any):
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.

    Args:
        created_at: The creation date of the object.
        updated_at: The last modification date of the object.
        data: The data of the object validated by Pydantic.
    """
    created_at, updated_at = validate_and_init_create_at_and_update_at(created_at, updated_at)
    data.update({"created_at": created_at, "updated_at": updated_at})
    super().__init__(**data)

view property

Get the embedding's view.

create_schema(embedding_fn, table_name, dataset, **embedding_function_kwargs) classmethod

Create a ViewEmbedding schema.

Parameters:

Name Type Description Default
embedding_fn str

The embedding function.

required
table_name str

The name of the table containing the schema.

required
dataset Dataset

The dataset to which the schema belongs.

required
embedding_function_kwargs Any

The keyword arguments for creating the embedding function.

{}

Returns:

Type Description
type[ViewEmbedding]

The ViewEmbedding schema.

Source code in pixano/features/schemas/embeddings/embedding.py
@classmethod
def create_schema(
    cls,
    embedding_fn: str,
    table_name: str,
    dataset: "Dataset",
    **embedding_function_kwargs: Any,
) -> type["ViewEmbedding"]:
    """Create a ViewEmbedding schema.

    Args:
        embedding_fn: The embedding function.
        table_name: The name of the table containing the schema.
        dataset: The dataset to which the schema belongs.
        embedding_function_kwargs: The keyword arguments for creating the embedding function.

    Returns:
        The `ViewEmbedding` schema.
    """
    lance_registry = get_registry()
    if not isinstance(embedding_fn, str):
        raise TypeError(f"{embedding_fn} should be a string")

    pixano_name = _to_pixano_name(dataset, table_name, embedding_fn)
    if pixano_name not in lance_registry._functions:
        type_embedding_function = lance_registry.get(embedding_fn)
        view_embedding_function: type[EmbeddingFunction] = create_view_embedding_function(
            type_embedding_function, pixano_name, dataset
        )
    else:
        view_embedding_function = lance_registry.get(pixano_name)

    view_embedding_function = view_embedding_function.create(**embedding_function_kwargs)

    embedding_fields = {
        "vector": (Vector(view_embedding_function.ndims()), view_embedding_function.VectorField()),
        "view_ref": (ViewRef, view_embedding_function.SourceField()),
    }
    return create_model(
        "ViewEmbedding",
        __base__=ViewEmbedding,
        **embedding_fields,
    )

get_embedding_fn_from_table(dataset, table_name, metadata) staticmethod

Get the embedding function from a table.

Parameters:

Name Type Description Default
dataset Dataset

The dataset containing the table.

required
table_name str

The name of the table containing the embedding function.

required
metadata dict

The pyarrow metadata of the table.

required

Returns:

Type Description
EmbeddingFunction

The embedding function.

Source code in pixano/features/schemas/embeddings/embedding.py
@staticmethod
def get_embedding_fn_from_table(dataset: "Dataset", table_name: str, metadata: dict) -> EmbeddingFunction:
    """Get the embedding function from a table.

    Args:
        dataset: The dataset containing the table.
        table_name: The name of the table containing the embedding function.
        metadata: The pyarrow metadata of the table.

    Returns:
        The embedding function.
    """
    registry = get_registry()

    serialized = metadata[b"embedding_functions"]
    raw_list = json.loads(serialized.decode("utf-8"))

    if len(raw_list) > 1:
        raise ValueError("Only one embedding function per table is supported")

    pixano_name = raw_list[0]["name"]
    if pixano_name not in registry._functions:
        name = _from_pixano_name(dataset, table_name, pixano_name)
        create_view_embedding_function(registry._functions[name], pixano_name, dataset)
    return registry.get(pixano_name)

create_view_embedding_function(type_embedding_function, name, dataset)

Create a ViewEmbeddingFunction based on an EmbeddingFunction.

Source code in pixano/features/schemas/embeddings/embedding.py
def create_view_embedding_function(
    type_embedding_function: type[EmbeddingFunction], name: str, dataset: "Dataset"
) -> type[EmbeddingFunction]:
    """Create a `ViewEmbeddingFunction` based on an
    [EmbeddingFunction][lancedb.embeddings.base.EmbeddingFunction].
    """

    @register(name)
    class ViewEmbeddingFunction(type_embedding_function):
        """A `ViewEmbeddingFunction` based on an [EmbeddingFunction][lancedb.embeddings.base.EmbeddingFunction]."""

        def _open_views(self, views: list[Any]) -> list[Any]:
            """Open the views in the dataset."""
            return [view.open(dataset.media_dir, "image") for view in views]

        def compute_source_embeddings(self, view_refs: pa.Table, *args, **kwargs) -> list:
            """Compute the embeddings for the source column in the database."""
            views = [dataset.resolve_ref(ViewRef(**view_ref)) for view_ref in view_refs.to_pylist()]
            view_type = type(views[0])
            if is_image(view_type) or is_sequence_frame(view_type):
                views = cast(list[Image], views)
                return super().compute_source_embeddings(self._open_views(views=views), *args, **kwargs)
            else:
                raise ValueError(f"View type {view_type} not supported for embedding.")

    return ViewEmbeddingFunction

is_embedding(cls, strict=False)

Check if a class is an Embedding or subclass of Embedding.

Source code in pixano/features/schemas/embeddings/embedding.py
def is_embedding(cls: type, strict: bool = False) -> bool:
    """Check if a class is an `Embedding` or subclass of `Embedding`."""
    return issubclass_strict(cls, Embedding, strict)

is_view_embedding(cls, strict=False)

Check if a class is an ViewEmbedding or subclass of ViewEmbedding.

Source code in pixano/features/schemas/embeddings/embedding.py
def is_view_embedding(cls: type, strict: bool = False) -> bool:
    """Check if a class is an `ViewEmbedding` or subclass of `ViewEmbedding`."""
    return issubclass_strict(cls, ViewEmbedding, strict)