Skip to content

pixano.datasets.queries.table

TableQueryBuilder(table)

Builder class for querying LanceTables.

It supports the select, where, limit, offset, and order_by clauses: - The select clause can be used to select specific columns from the table. If not provided, all columns are selected. - The where clause can be used to filter the rows of the table. - The limit clause can be used to limit the number of rows returned. - The offset clause can be used to skip the first n rows. - The order_by clause can be used to sort the rows of the table.

The query is built and executed when calling to_pandas(), to_list(), to_pydantic(), or to_polars().

Attributes:

Name Type Description
table LanceTable

The LanceTable to query.

Parameters:

Name Type Description Default
table LanceTable

The LanceTable to query.

required
Source code in pixano/datasets/queries/table.py
def __init__(self, table: LanceTable):
    """Initializes the TableQueryBuilder.

    Args:
        table: The LanceTable to query.
    """
    if not isinstance(table, LanceTable):
        raise ValueError("table must be a LanceTable.")

    self.table: LanceTable = table
    self._columns: list[str] | dict[str, str] | None = None
    self._where: str | None = None
    self._limit: int | None = None
    self._offset: int | None = None
    self._order_by: list[str] = []
    self._descending: list[bool] = []
    self._function_called: dict[str, bool] = {
        "select": False,
        "where": False,
        "limit": False,
        "offset": False,
        "order_by": False,
        "build": False,
    }

limit(limit)

Sets the limit for the query.

Parameters:

Name Type Description Default
limit int | None

The number of rows to return.

required

Returns:

Type Description
Self

The TableQueryBuilder instance.

Source code in pixano/datasets/queries/table.py
def limit(self, limit: int | None) -> Self:
    """Sets the limit for the query.

    Args:
        limit: The number of rows to return.

    Returns:
        The TableQueryBuilder instance.
    """
    self._check_called("limit")
    if limit is not None:
        if not isinstance(limit, int) or limit < 0:
            raise ValueError("limit must be None or a positive integer.")
    self._limit = limit
    return self

offset(offset)

Sets the offset for the query.

Parameters:

Name Type Description Default
offset int | None

The number of rows to skip.

required

Returns:

Type Description
Self

The TableQueryBuilder instance.

Source code in pixano/datasets/queries/table.py
def offset(self, offset: int | None) -> Self:
    """Sets the offset for the query.

    Args:
        offset: The number of rows to skip.

    Returns:
        The TableQueryBuilder instance.
    """
    self._check_called("offset")
    if offset is not None:
        if not isinstance(offset, int) or offset < 0:
            raise ValueError("offset must be None or a positive integer.")
    self._offset = offset
    return self

order_by(order_by, descending=False)

Sets the order_by clause for the query.

Parameters:

Name Type Description Default
order_by str | list[str]

The column(s) to sort by.

required
descending bool | list[bool]

Whether to sort in descending order.

False

Returns:

Type Description
Self

The TableQueryBuilder instance.

Source code in pixano/datasets/queries/table.py
def order_by(self, order_by: str | list[str], descending: bool | list[bool] = False) -> Self:
    """Sets the order_by clause for the query.

    Args:
        order_by: The column(s) to sort by.
        descending: Whether to sort in descending order.

    Returns:
        The TableQueryBuilder instance.
    """
    self._check_called("order_by")
    if isinstance(order_by, str):
        order_by = [order_by]
    elif not isinstance(order_by, list) or not all(isinstance(x, str) for x in order_by):
        raise ValueError("order_by must be a string or a list of strings.")
    if isinstance(descending, bool):
        descending = [descending] * len(order_by)
    elif (
        not isinstance(descending, list)
        or not all(isinstance(x, bool) for x in descending)
        or len(descending) != len(order_by)
    ):
        raise ValueError("descending must be a boolean or a list of booleans with the same length as order_by.")

    self._order_by = order_by
    self._descending = descending
    return self

select(columns)

Selects columns to include in the query.

Note

'id' is always included in the select clause.

Parameters:

Name Type Description Default
columns str | list[str] | dict[str, str]

The columns to include in the query. If a list, the columns are selected in the order they are provided. If a dictionary, the keys are the column names and the values are the aliases.

required
Source code in pixano/datasets/queries/table.py
def select(self, columns: str | list[str] | dict[str, str]) -> Self:
    """Selects columns to include in the query.

    Note:
        'id' is always included in the select clause.

    Args:
        columns: The columns to include in the query. If a list, the columns are selected in the order they are
            provided. If a dictionary, the keys are the column names and the values are the aliases.
    """
    self._check_called("select")
    if isinstance(columns, str):
        columns = [columns]

    if isinstance(columns, list) or isinstance(columns, dict):
        if isinstance(columns, list) and not all(isinstance(x, str) for x in columns):
            raise ValueError("columns must be a list of strings.")
        elif isinstance(columns, dict) and not all(
            isinstance(k, str) and isinstance(v, str) for k, v in columns.items()
        ):
            raise ValueError("columns must be a dictionary with string keys and values.")
        if isinstance(columns, list) and "id" not in columns:
            columns = ["id"] + columns
        elif isinstance(columns, dict) and "id" not in columns.values():
            columns["id"] = "id"
        self._columns = columns
    else:
        raise ValueError("columns must be a string, a list of string or a string mapping dictionary.")
    return self

to_list()

Builds the query and returns the result as a list of dictionaries.

Returns:

Type Description
list[dict[str, Any]]

The result as a list of dictionaries.

Source code in pixano/datasets/queries/table.py
def to_list(self) -> list[dict[str, Any]]:
    """Builds the query and returns the result as a list of dictionaries.

    Returns:
        The result as a list of dictionaries.
    """
    return _PixanoEmptyQueryBuilder(self._execute()).to_list()

to_pandas()

Builds the query and returns the result as a pandas DataFrame.

Returns:

Type Description
DataFrame

The result as a pandas DataFrame.

Source code in pixano/datasets/queries/table.py
def to_pandas(self) -> pd.DataFrame:
    """Builds the query and returns the result as a pandas DataFrame.

    Returns:
        The result as a pandas DataFrame.
    """
    return _PixanoEmptyQueryBuilder(self._execute()).to_pandas()

to_polars()

Builds the query and returns the result as a polars DataFrame.

Returns:

Type Description
DataFrame

The result as a polars DataFrame.

Source code in pixano/datasets/queries/table.py
def to_polars(self) -> pl.DataFrame:
    """Builds the query and returns the result as a polars DataFrame.

    Returns:
        The result as a polars DataFrame.
    """
    return _PixanoEmptyQueryBuilder(self._execute()).to_polars()

to_pydantic(model)

Builds the query and returns the result as a list of Pydantic models.

Returns:

Type Description
list[T]

The result as a list of Pydantic models.

Source code in pixano/datasets/queries/table.py
def to_pydantic(self, model: type[T]) -> list[T]:
    """Builds the query and returns the result as a list of Pydantic models.

    Returns:
        The result as a list of Pydantic models.
    """
    return _PixanoEmptyQueryBuilder(self._execute()).to_pydantic(model)

where(where)

Sets the where clause for the query.

Parameters:

Name Type Description Default
where str

The condition to filter the rows.

required

Returns:

Type Description
Self

The TableQueryBuilder instance.

Source code in pixano/datasets/queries/table.py
def where(self, where: str) -> Self:
    """Sets the where clause for the query.

    Args:
        where: The condition to filter the rows.

    Returns:
        The TableQueryBuilder instance.
    """
    self._check_called("where")
    if not isinstance(where, str):
        raise ValueError("where must be a string.")
    self._where = where
    return self