`pixano_inference.github.groundingdino`

`GroundingDINO(checkpoint_path, config_path, model_id='', device='cuda')`

Bases: InferenceModel

GroundingDINO Model

Attributes:

Name	Type	Description
`name`	`str`	Model name
`model_id`	`str`	Model ID
`device`	`str`	Model GPU or CPU device
`description`	`str`	Model description
`model`	`Module`	PyTorch model
`checkpoint_path`	`Path`	Model checkpoint path
`config_path`	`Path`	Model config path

Parameters:

Name	Type	Description	Default
`checkpoint_path`	`Path`	Model checkpoint path (download from https://github.com/IDEA-Research/GroundingDINO)	required
`config_path`	`Path`	Model config path (download from https://github.com/IDEA-Research/GroundingDINO)	required
`model_id`	`str`	Previously used ID, generate new ID if "". Defaults to "".	`''`
`device`	`str`	Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".	`'cuda'`

Source code in pixano_inference/github/groundingdino.py

def __init__(
    self,
    checkpoint_path: Path,
    config_path: Path,
    model_id: str = "",
    device: str = "cuda",
) -> None:
    """Initialize model

    Args:
        checkpoint_path (Path): Model checkpoint path (download from https://github.com/IDEA-Research/GroundingDINO)
        config_path (Path): Model config path (download from https://github.com/IDEA-Research/GroundingDINO)
        model_id (str, optional): Previously used ID, generate new ID if "". Defaults to "".
        device (str, optional): Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".
    """

    # Import GroundingDINO
    gd_inf = attempt_import(
        "groundingdino.util.inference",
        "groundingdino@git+https://github.com/IDEA-Research/GroundingDINO",
    )

    super().__init__(
        name="GroundingDINO",
        model_id=model_id,
        device=device,
        description="Fom GitHub, GroundingDINO model.",
    )

    # Model
    self.model = gd_inf.load_model(
        config_path.as_posix(),
        checkpoint_path.as_posix(),
    )
    self.model.to(self.device)

`preannotate(batch, views, uri_prefix, threshold=0.0, prompt='')`

Inference pre-annotation for a batch

Parameters:

Name	Type	Description	Default
`batch`	`RecordBatch`	Input batch	required
`views`	`list[str]`	Dataset views	required
`uri_prefix`	`str`	URI prefix for media files	required
`threshold`	`float`	Confidence threshold. Defaults to 0.0.	`0.0`
`prompt`	`str`	Annotation text prompt. Defaults to "".	`''`

Returns:

Type	Description
`list[dict]`	Processed rows

Source code in pixano_inference/github/groundingdino.py

def preannotate(
    self,
    batch: pa.RecordBatch,
    views: list[str],
    uri_prefix: str,
    threshold: float = 0.0,
    prompt: str = "",
) -> list[dict]:
    """Inference pre-annotation for a batch

    Args:
        batch (pa.RecordBatch): Input batch
        views (list[str]): Dataset views
        uri_prefix (str): URI prefix for media files
        threshold (float, optional): Confidence threshold. Defaults to 0.0.
        prompt (str, optional): Annotation text prompt. Defaults to "".

    Returns:
        list[dict]: Processed rows
    """

    rows = []

    # Import GroundingDINO
    gd_inf = attempt_import(
        "groundingdino.util.inference",
        "groundingdino@git+https://github.com/IDEA-Research/GroundingDINO",
    )

    for view in views:
        # Iterate manually
        for x in range(batch.num_rows):
            # Preprocess image
            im: Image = Image.from_dict(batch[view][x].as_py())
            im.uri_prefix = uri_prefix

            _, image = gd_inf.load_image(im.path.as_posix())

            # Inference
            bbox_tensor, logit_tensor, category_list = gd_inf.predict(
                model=self.model,
                image=image,
                caption=prompt,
                box_threshold=0.35,
                text_threshold=0.25,
            )

            # Convert bounding boxes from cyxcywh to xywh
            bbox_tensor = box_convert(
                boxes=bbox_tensor, in_fmt="cxcywh", out_fmt="xywh"
            )
            bbox_list = [[coord.item() for coord in bbox] for bbox in bbox_tensor]

            # Process model outputs
            rows.extend(
                [
                    {
                        "id": shortuuid.uuid(),
                        "item_id": batch["id"][x].as_py(),
                        "view_id": view,
                        "bbox": BBox.from_xywh(
                            bbox_list[i],
                            confidence=logit_tensor[i].item(),
                        ).to_dict(),
                        "category": category_list[i],
                    }
                    for i in range(len(category_list))
                    if logit_tensor[i].item() > threshold
                ]
            )

    return rows