Skip to content

pixano_inference.github.groundingdino

GroundingDINO(checkpoint_path, config_path, model_id='', device='cuda')

Bases: InferenceModel

GroundingDINO Model

Attributes:

Name Type Description
name str

Model name

model_id str

Model ID

device str

Model GPU or CPU device

description str

Model description

model Module

PyTorch model

checkpoint_path Path

Model checkpoint path

config_path Path

Model config path

Parameters:

Name Type Description Default
checkpoint_path Path

Model checkpoint path (download from https://github.com/IDEA-Research/GroundingDINO)

required
config_path Path

Model config path (download from https://github.com/IDEA-Research/GroundingDINO)

required
model_id str

Previously used ID, generate new ID if "". Defaults to "".

''
device str

Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".

'cuda'
Source code in pixano_inference/github/groundingdino.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def __init__(
    self,
    checkpoint_path: Path,
    config_path: Path,
    model_id: str = "",
    device: str = "cuda",
) -> None:
    """Initialize model

    Args:
        checkpoint_path (Path): Model checkpoint path (download from https://github.com/IDEA-Research/GroundingDINO)
        config_path (Path): Model config path (download from https://github.com/IDEA-Research/GroundingDINO)
        model_id (str, optional): Previously used ID, generate new ID if "". Defaults to "".
        device (str, optional): Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".
    """

    # Import GroundingDINO
    gd_inf = attempt_import(
        "groundingdino.util.inference",
        "groundingdino@git+https://github.com/IDEA-Research/GroundingDINO",
    )

    super().__init__(
        name="GroundingDINO",
        model_id=model_id,
        device=device,
        description="Fom GitHub, GroundingDINO model.",
    )

    # Model
    self.model = gd_inf.load_model(
        config_path.as_posix(),
        checkpoint_path.as_posix(),
    )
    self.model.to(self.device)

preannotate(batch, views, uri_prefix, threshold=0.0, prompt='')

Inference pre-annotation for a batch

Parameters:

Name Type Description Default
batch RecordBatch

Input batch

required
views list[str]

Dataset views

required
uri_prefix str

URI prefix for media files

required
threshold float

Confidence threshold. Defaults to 0.0.

0.0
prompt str

Annotation text prompt. Defaults to "".

''

Returns:

Type Description
list[dict]

Processed rows

Source code in pixano_inference/github/groundingdino.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def preannotate(
    self,
    batch: pa.RecordBatch,
    views: list[str],
    uri_prefix: str,
    threshold: float = 0.0,
    prompt: str = "",
) -> list[dict]:
    """Inference pre-annotation for a batch

    Args:
        batch (pa.RecordBatch): Input batch
        views (list[str]): Dataset views
        uri_prefix (str): URI prefix for media files
        threshold (float, optional): Confidence threshold. Defaults to 0.0.
        prompt (str, optional): Annotation text prompt. Defaults to "".

    Returns:
        list[dict]: Processed rows
    """

    rows = []

    # Import GroundingDINO
    gd_inf = attempt_import(
        "groundingdino.util.inference",
        "groundingdino@git+https://github.com/IDEA-Research/GroundingDINO",
    )

    for view in views:
        # Iterate manually
        for x in range(batch.num_rows):
            # Preprocess image
            im: Image = Image.from_dict(batch[view][x].as_py())
            im.uri_prefix = uri_prefix

            _, image = gd_inf.load_image(im.path.as_posix())

            # Inference
            bbox_tensor, logit_tensor, category_list = gd_inf.predict(
                model=self.model,
                image=image,
                caption=prompt,
                box_threshold=0.35,
                text_threshold=0.25,
            )

            # Convert bounding boxes from cyxcywh to xywh
            bbox_tensor = box_convert(
                boxes=bbox_tensor, in_fmt="cxcywh", out_fmt="xywh"
            )
            bbox_list = [[coord.item() for coord in bbox] for bbox in bbox_tensor]

            # Process model outputs
            rows.extend(
                [
                    {
                        "id": shortuuid.uuid(),
                        "item_id": batch["id"][x].as_py(),
                        "view_id": view,
                        "bbox": BBox.from_xywh(
                            bbox_list[i],
                            confidence=logit_tensor[i].item(),
                        ).to_dict(),
                        "category": category_list[i],
                    }
                    for i in range(len(category_list))
                    if logit_tensor[i].item() > threshold
                ]
            )

    return rows