Skip to content

pixano_inference.models.vlm

VLM (Vision-Language Model) base class and I/O types.

UsageInfo(**data)

Bases: BaseModel

Usage metadata for generation.

Attributes:

Name Type Description
prompt_tokens int

Number of tokens in the prompt.

completion_tokens int

Number of tokens in the completion.

total_tokens int

Total number of tokens.

Source code in pydantic/main.py
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

VLMInput(**data)

Bases: BaseModel

Input for vision-language model generation.

Attributes:

Name Type Description
prompt str | list[dict[str, Any]]

Prompt for the generation. Can be a string or a list of dicts for chat templates.

images list[str | Path] | None

Images for the generation. Can be None if images are passed in the prompt.

max_new_tokens int

Maximum number of new tokens to generate.

temperature float

Temperature for the generation.

Source code in pydantic/main.py
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

VLMModel(config)

Bases: InferenceModel

Base class for vision-language models.

Example
@register_model("my-vlm")
class MyVLM(VLMModel):
    def load_model(self):
        self.model = load_weights(self.config.model_params["path"])

    def predict(self, input: VLMInput) -> VLMOutput:
        text = self.model.generate(input.prompt, input.images)
        return VLMOutput(generated_text=text, usage=..., generation_config=...)
Source code in pixano_inference/models/base.py
def __init__(self, config: ModelDeploymentConfig) -> None:
    """Initialize the model with deployment config.

    Args:
        config: Model deployment configuration.
    """
    self._config = config

predict(input) abstractmethod

Run vision-language generation.

Parameters:

Name Type Description Default
input VLMInput

VLM input with prompt, images, and generation parameters.

required

Returns:

Type Description
VLMOutput

VLM output with generated text, usage info, and generation config.

Source code in pixano_inference/models/vlm.py
@abstractmethod
def predict(self, input: VLMInput) -> VLMOutput:
    """Run vision-language generation.

    Args:
        input: VLM input with prompt, images, and generation parameters.

    Returns:
        VLM output with generated text, usage info, and generation config.
    """

VLMOutput(**data)

Bases: BaseModel

Output for vision-language model generation.

Attributes:

Name Type Description
generated_text str

Generated text.

usage UsageInfo

Usage metadata.

generation_config dict[str, Any]

Configuration used for the generation.

Source code in pydantic/main.py
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )