Skip to content

pixano_inference.github.mobile_sam

MobileSAM(checkpoint_path, model_id='', device='cpu')

Bases: InferenceModel

MobileSAM

Attributes:

Name Type Description
name str

Model name

model_id str

Model ID

device str

Model GPU or CPU device (e.g. "cuda", "cpu")

description str

Model description

model Module

MobileSAM model

checkpoint_path Path

Model checkpoint path

Parameters:

Name Type Description Default
checkpoint_path Path

Model checkpoint path.

required
model_id str

Previously used ID, generate new ID if "". Defaults to "".

''
device str

Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cpu".

'cpu'
Source code in pixano_inference/github/mobile_sam.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def __init__(
    self,
    checkpoint_path: Path,
    model_id: str = "",
    device: str = "cpu",
) -> None:
    """Initialize model

    Args:
        checkpoint_path (Path): Model checkpoint path.
        model_id (str, optional): Previously used ID, generate new ID if "". Defaults to "".
        device (str, optional): Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cpu".
    """

    # Import MobileSAM
    mobile_sam = attempt_import(
        "mobile_sam", "mobile-sam@git+https://github.com/ChaoningZhang/MobileSAM"
    )

    super().__init__(
        name="Mobile_SAM",
        model_id=model_id,
        device=device,
        description="From GitHub. MobileSAM, ViT-T backbone.",
    )

    # Model
    self.model = mobile_sam.sam_model_registry["vit_t"](checkpoint=checkpoint_path)
    self.model.to(device=self.device)

    # Model path
    self.checkpoint_path = checkpoint_path

export_to_onnx(library_dir)

Export Torch model to ONNX

Parameters:

Name Type Description Default
library_dir Path

Dataset library directory

required
Source code in pixano_inference/github/mobile_sam.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
def export_to_onnx(self, library_dir: Path):
    """Export Torch model to ONNX

    Args:
        library_dir (Path): Dataset library directory
    """

    # Import MobileSAM
    mobile_sam = attempt_import(
        "mobile_sam", "mobile-sam@git+https://github.com/ChaoningZhang/MobileSAM"
    )

    # Model directory
    model_dir = library_dir / "models"
    model_dir.mkdir(parents=True, exist_ok=True)

    # Put model to CPU for export
    self.model.to("cpu")

    # Export settings
    onnx_model = mobile_sam.utils.onnx.SamOnnxModel(
        self.model, return_single_mask=True
    )
    dynamic_axes = {
        "point_coords": {1: "num_points"},
        "point_labels": {1: "num_points"},
    }
    embed_dim = self.model.prompt_encoder.embed_dim
    embed_size = self.model.prompt_encoder.image_embedding_size
    mask_input_size = [4 * x for x in embed_size]
    dummy_inputs = {
        "image_embeddings": torch.randn(
            1, embed_dim, *embed_size, dtype=torch.float
        ),
        "point_coords": torch.randint(
            low=0, high=1024, size=(1, 5, 2), dtype=torch.float
        ),
        "point_labels": torch.randint(
            low=0, high=4, size=(1, 5), dtype=torch.float
        ),
        "mask_input": torch.randn(1, 1, *mask_input_size, dtype=torch.float),
        "has_mask_input": torch.tensor([1], dtype=torch.float),
        "orig_im_size": torch.tensor([1500, 2250], dtype=torch.float),
    }
    output_names = ["masks", "iou_predictions", "low_res_masks"]
    onnx_path = model_dir / self.checkpoint_path.name.replace(".pt", ".onnx")

    # Export model
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=torch.jit.TracerWarning)
        warnings.filterwarnings("ignore", category=UserWarning)
        with open(onnx_path, "wb") as f:
            torch.onnx.export(
                onnx_model,
                tuple(dummy_inputs.values()),
                f,
                export_params=True,
                verbose=False,
                opset_version=17,
                do_constant_folding=True,
                input_names=list(dummy_inputs.keys()),
                output_names=output_names,
                dynamic_axes=dynamic_axes,
            )
    # Quantize model
    quantize_dynamic(
        model_input=onnx_path,
        model_output=onnx_path,
        optimize_model=True,
        per_channel=False,
        reduce_range=False,
        weight_type=QuantType.QUInt8,
    )

    # Put model back to device after export
    self.model.to(self.device)

preannotate(batch, views, uri_prefix, threshold=0.0, prompt='')

Inference pre-annotation for a batch

Parameters:

Name Type Description Default
batch RecordBatch

Input batch

required
views list[str]

Dataset views

required
uri_prefix str

URI prefix for media files

required
threshold float

Confidence threshold. Defaults to 0.0.

0.0
prompt str

Annotation text prompt. Defaults to "".

''

Returns:

Type Description
list[dict]

Processed rows

Source code in pixano_inference/github/mobile_sam.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def preannotate(
    self,
    batch: pa.RecordBatch,
    views: list[str],
    uri_prefix: str,
    threshold: float = 0.0,
    prompt: str = "",
) -> list[dict]:
    """Inference pre-annotation for a batch

    Args:
        batch (pa.RecordBatch): Input batch
        views (list[str]): Dataset views
        uri_prefix (str): URI prefix for media files
        threshold (float, optional): Confidence threshold. Defaults to 0.0.
        prompt (str, optional): Annotation text prompt. Defaults to "".

    Returns:
        list[dict]: Processed rows
    """

    # Import MobileSAM
    mobile_sam = attempt_import(
        "mobile_sam", "mobile-sam@git+https://github.com/ChaoningZhang/MobileSAM"
    )

    rows = []
    _ = prompt  # This model does not use prompts

    for view in views:
        # Iterate manually
        for x in range(batch.num_rows):
            # Preprocess image
            im: Image = Image.from_dict(batch[view][x].as_py())
            im.uri_prefix = uri_prefix
            im = im.as_cv2()
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

            # Inference
            with torch.no_grad():
                generator = mobile_sam.SamAutomaticMaskGenerator(self.model)
                output = generator.generate(im)

            # Process model outputs
            h, w = im.shape[:2]
            rows.extend(
                [
                    {
                        "id": shortuuid.uuid(),
                        "item_id": batch["id"][x].as_py(),
                        "view_id": view,
                        "bbox": BBox.from_xywh(
                            [int(coord) for coord in output[i]["bbox"]],
                            confidence=float(output[i]["predicted_iou"]),
                        )
                        .normalize(h, w)
                        .to_dict(),
                        "mask": CompressedRLE.from_mask(
                            output[i]["segmentation"]
                        ).to_dict(),
                    }
                    for i in range(len(output))
                    if output[i]["predicted_iou"] > threshold
                ]
            )

    return rows

precompute_embeddings(batch, views, uri_prefix)

Embedding precomputing for a batch

Parameters:

Name Type Description Default
batch RecordBatch

Input batch

required
views list[str]

Dataset views

required
uri_prefix str

URI prefix for media files

required

Returns:

Type Description
RecordBatch

Embedding rows

Source code in pixano_inference/github/mobile_sam.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
def precompute_embeddings(
    self,
    batch: pa.RecordBatch,
    views: list[str],
    uri_prefix: str,
) -> list[dict]:
    """Embedding precomputing for a batch

    Args:
        batch (pa.RecordBatch): Input batch
        views (list[str]): Dataset views
        uri_prefix (str): URI prefix for media files

    Returns:
        pa.RecordBatch: Embedding rows
    """

    # Import MobileSAM
    mobile_sam = attempt_import(
        "mobile_sam", "mobile-sam@git+https://github.com/ChaoningZhang/MobileSAM"
    )

    rows = [
        {
            "id": batch["id"][x].as_py(),
        }
        for x in range(batch.num_rows)
    ]

    for view in views:
        # Iterate manually
        for x in range(batch.num_rows):
            # Preprocess image
            im: Image = Image.from_dict(batch[view][x].as_py())
            im.uri_prefix = uri_prefix
            im = im.as_cv2()
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

            # Inference
            with torch.no_grad():
                predictor = mobile_sam.SamPredictor(self.model)
                predictor.set_image(im)
                img_embedding = predictor.get_image_embedding().cpu().numpy()

            # Process model outputs
            emb_bytes = BytesIO()
            np.save(emb_bytes, img_embedding)
            rows[x][view] = emb_bytes.getvalue()

    return rows