Skip to content

pixano_inference.pytorch.maskrcnnv2

MaskRCNNv2(model_id='', device='cuda')

Bases: InferenceModel

PyTorch Hub MaskRCNNv2 Model

Attributes:

Name Type Description
name str

Model name

model_id str

Model ID

device str

Model GPU or CPU device

description str

Model description

model Module

PyTorch model

transforms Module

PyTorch preprocessing transforms

Parameters:

Name Type Description Default
model_id str

Previously used ID, generate new ID if "". Defaults to "".

''
device str

Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".

'cuda'
Source code in pixano_inference/pytorch/maskrcnnv2.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def __init__(
    self,
    model_id: str = "",
    device: str = "cuda",
) -> None:
    """Initialize model

    Args:
        model_id (str, optional): Previously used ID, generate new ID if "". Defaults to "".
        device (str, optional): Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".
    """

    super().__init__(
        name="MaskRCNNv2",
        model_id=model_id,
        device=device,
        description="From PyTorch Hub. MaskRCNN, ResNet-50-FPN v2 Backbone, COCO_V1 Weights.",
    )

    # Model
    self.model = maskrcnn_resnet50_fpn_v2(
        weights=MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1
    )
    self.model.eval()
    self.model.to(self.device)

    # Transforms
    self.transforms = MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1.transforms()

preannotate(batch, views, uri_prefix, threshold=0.0, prompt='')

Inference pre-annotation for a batch

Parameters:

Name Type Description Default
batch RecordBatch

Input batch

required
views list[str]

Dataset views

required
uri_prefix str

URI prefix for media files

required
threshold float

Confidence threshold. Defaults to 0.0.

0.0
prompt str

Annotation text prompt. Defaults to "".

''

Returns:

Type Description
list[dict]

Processed rows

Source code in pixano_inference/pytorch/maskrcnnv2.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def preannotate(
    self,
    batch: pa.RecordBatch,
    views: list[str],
    uri_prefix: str,
    threshold: float = 0.0,
    prompt: str = "",
) -> list[dict]:
    """Inference pre-annotation for a batch

    Args:
        batch (pa.RecordBatch): Input batch
        views (list[str]): Dataset views
        uri_prefix (str): URI prefix for media files
        threshold (float, optional): Confidence threshold. Defaults to 0.0.
        prompt (str, optional): Annotation text prompt. Defaults to "".

    Returns:
        list[dict]: Processed rows
    """

    rows = []
    _ = prompt  # This model does not use prompts

    for view in views:
        # PyTorch Transforms don't support different-sized image batches, so iterate manually
        for x in range(batch.num_rows):
            # Preprocess image
            im: Image = Image.from_dict(batch[view][x].as_py())
            im.uri_prefix = uri_prefix
            im = im.as_pillow()
            im_tensor = self.transforms(im).unsqueeze(0).to(self.device)

            # Inference
            with torch.no_grad():
                output = self.model(im_tensor)[0]

            # Process model outputs
            w, h = im.size
            rows.extend(
                [
                    {
                        "id": shortuuid.uuid(),
                        "item_id": batch["id"][x].as_py(),
                        "view_id": view,
                        "bbox": BBox.from_xyxy(
                            [coord.item() for coord in output["boxes"][i]],
                            confidence=output["scores"][i].item(),
                        )
                        .normalize(h, w)
                        .to_dict(),
                        "mask": CompressedRLE.from_mask(
                            unmold_mask(output["masks"][i])
                        ).to_dict(),
                        "category": coco_names_91(output["labels"][i]),
                    }
                    for i in range(len(output["scores"]))
                    if output["scores"][i] > threshold
                ]
            )

    return rows

unmold_mask(mask, threshold=0.5)

Convert mask from torch.Tensor to np.array, squeeze a dimension if needed, and treshold values

Parameters:

Name Type Description Default
mask Tensor

Mask (1, W, H)

required
threshold float

Confidence threshold. Defaults to 0.5.

0.5

Returns:

Type Description
array

Mask (W, H)

Source code in pixano_inference/pytorch/maskrcnnv2.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def unmold_mask(mask: torch.Tensor, threshold: float = 0.5):
    """Convert mask from torch.Tensor to np.array, squeeze a dimension if needed, and treshold values

    Args:
        mask (torch.Tensor): Mask (1, W, H)
        threshold (float, optional): Confidence threshold. Defaults to 0.5.

    Returns:
        np.array: Mask (W, H)
    """

    # Detach and convert to NumPy
    mask = mask.cpu().numpy()

    # Squeeze dimension if needed
    if 1 in mask.shape:
        mask = mask.squeeze(mask.shape.index(1))

    # Threshold values
    mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)

    return mask