`pixano_inference.pytorch.maskrcnnv2`

`MaskRCNNv2(model_id='', device='cuda')`

Bases: InferenceModel

PyTorch Hub MaskRCNNv2 Model

Attributes:

Name	Type	Description
`name`	`str`	Model name
`model_id`	`str`	Model ID
`device`	`str`	Model GPU or CPU device
`description`	`str`	Model description
`model`	`Module`	PyTorch model
`transforms`	`Module`	PyTorch preprocessing transforms

Parameters:

Name	Type	Description	Default
`model_id`	`str`	Previously used ID, generate new ID if "". Defaults to "".	`''`
`device`	`str`	Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".	`'cuda'`

Source code in pixano_inference/pytorch/maskrcnnv2.py

def __init__(
    self,
    model_id: str = "",
    device: str = "cuda",
) -> None:
    """Initialize model

    Args:
        model_id (str, optional): Previously used ID, generate new ID if "". Defaults to "".
        device (str, optional): Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".
    """

    super().__init__(
        name="MaskRCNNv2",
        model_id=model_id,
        device=device,
        description="From PyTorch Hub. MaskRCNN, ResNet-50-FPN v2 Backbone, COCO_V1 Weights.",
    )

    # Model
    self.model = maskrcnn_resnet50_fpn_v2(
        weights=MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1
    )
    self.model.eval()
    self.model.to(self.device)

    # Transforms
    self.transforms = MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1.transforms()

`preannotate(batch, views, uri_prefix, threshold=0.0, prompt='')`

Inference pre-annotation for a batch

Parameters:

Name	Type	Description	Default
`batch`	`RecordBatch`	Input batch	required
`views`	`list[str]`	Dataset views	required
`uri_prefix`	`str`	URI prefix for media files	required
`threshold`	`float`	Confidence threshold. Defaults to 0.0.	`0.0`
`prompt`	`str`	Annotation text prompt. Defaults to "".	`''`

Returns:

Type	Description
`list[dict]`	Processed rows

Source code in pixano_inference/pytorch/maskrcnnv2.py

def preannotate(
    self,
    batch: pa.RecordBatch,
    views: list[str],
    uri_prefix: str,
    threshold: float = 0.0,
    prompt: str = "",
) -> list[dict]:
    """Inference pre-annotation for a batch

    Args:
        batch (pa.RecordBatch): Input batch
        views (list[str]): Dataset views
        uri_prefix (str): URI prefix for media files
        threshold (float, optional): Confidence threshold. Defaults to 0.0.
        prompt (str, optional): Annotation text prompt. Defaults to "".

    Returns:
        list[dict]: Processed rows
    """

    rows = []
    _ = prompt  # This model does not use prompts

    for view in views:
        # PyTorch Transforms don't support different-sized image batches, so iterate manually
        for x in range(batch.num_rows):
            # Preprocess image
            im: Image = Image.from_dict(batch[view][x].as_py())
            im.uri_prefix = uri_prefix
            im = im.as_pillow()
            im_tensor = self.transforms(im).unsqueeze(0).to(self.device)

            # Inference
            with torch.no_grad():
                output = self.model(im_tensor)[0]

            # Process model outputs
            w, h = im.size
            rows.extend(
                [
                    {
                        "id": shortuuid.uuid(),
                        "item_id": batch["id"][x].as_py(),
                        "view_id": view,
                        "bbox": BBox.from_xyxy(
                            [coord.item() for coord in output["boxes"][i]],
                            confidence=output["scores"][i].item(),
                        )
                        .normalize(h, w)
                        .to_dict(),
                        "mask": CompressedRLE.from_mask(
                            unmold_mask(output["masks"][i])
                        ).to_dict(),
                        "category": coco_names_91(output["labels"][i]),
                    }
                    for i in range(len(output["scores"]))
                    if output["scores"][i] > threshold
                ]
            )

    return rows

`unmold_mask(mask, threshold=0.5)`

Convert mask from torch.Tensor to np.array, squeeze a dimension if needed, and treshold values

Parameters:

Name	Type	Description	Default
`mask`	`Tensor`	Mask (1, W, H)	required
`threshold`	`float`	Confidence threshold. Defaults to 0.5.	`0.5`

Returns:

Type	Description
`array`	Mask (W, H)

Source code in pixano_inference/pytorch/maskrcnnv2.py

def unmold_mask(mask: torch.Tensor, threshold: float = 0.5):
    """Convert mask from torch.Tensor to np.array, squeeze a dimension if needed, and treshold values

    Args:
        mask (torch.Tensor): Mask (1, W, H)
        threshold (float, optional): Confidence threshold. Defaults to 0.5.

    Returns:
        np.array: Mask (W, H)
    """

    # Detach and convert to NumPy
    mask = mask.cpu().numpy()

    # Squeeze dimension if needed
    if 1 in mask.shape:
        mask = mask.squeeze(mask.shape.index(1))

    # Threshold values
    mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)

    return mask