Skip to content

pixano_inference.pytorch.yolov5

YOLOv5(size='s', model_id='', device='cuda')

Bases: InferenceModel

PyTorch Hub YOLOv5 Model

Attributes:

Name Type Description
name str

Model name

model_id str

Model ID

device str

Model GPU or CPU device

description str

Model description

model Module

PyTorch model

Parameters:

Name Type Description Default
size str

Model size ("n", "s", "m", "x"). Defaults to "s".

's'
model_id str

Previously used ID, generate new ID if "". Defaults to "".

''
device str

Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".

'cuda'
Source code in pixano_inference/pytorch/yolov5.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(
    self,
    size: str = "s",
    model_id: str = "",
    device: str = "cuda",
) -> None:
    """Initialize model

    Args:
        size (str, optional): Model size ("n", "s", "m", "x"). Defaults to "s".
        model_id (str, optional): Previously used ID, generate new ID if "". Defaults to "".
        device (str, optional): Model GPU or CPU device (e.g. "cuda", "cpu"). Defaults to "cuda".
    """

    super().__init__(
        name=f"YOLOv5{size}",
        model_id=model_id,
        device=device,
        description=f"From PyTorch Hub. YOLOv5 model, {size.upper()} backbone.",
    )

    # Model
    self.model = torch.hub.load(
        "ultralytics/yolov5",
        model=f"yolov5{size}",
        pretrained=True,
    )
    self.model.to(self.device)

preannotate(batch, views, uri_prefix, threshold=0.0, prompt='')

Inference pre-annotation for a batch

Parameters:

Name Type Description Default
batch RecordBatch

Input batch

required
views list[str]

Dataset views

required
uri_prefix str

URI prefix for media files

required
threshold float

Confidence threshold. Defaults to 0.0.

0.0
prompt str

Annotation text prompt. Defaults to "".

''

Returns:

Type Description
list[dict]

Processed rows

Source code in pixano_inference/pytorch/yolov5.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def preannotate(
    self,
    batch: pa.RecordBatch,
    views: list[str],
    uri_prefix: str,
    threshold: float = 0.0,
    prompt: str = "",
) -> list[dict]:
    """Inference pre-annotation for a batch

    Args:
        batch (pa.RecordBatch): Input batch
        views (list[str]): Dataset views
        uri_prefix (str): URI prefix for media files
        threshold (float, optional): Confidence threshold. Defaults to 0.0.
        prompt (str, optional): Annotation text prompt. Defaults to "".

    Returns:
        list[dict]: Processed rows
    """

    rows = []
    _ = prompt  # This model does not use prompts

    for view in views:
        # Preprocess image batch
        im_batch = []
        for x in range(batch.num_rows):
            im: Image = Image.from_dict(batch[view][x].as_py())
            im.uri_prefix = uri_prefix
            im_batch.append(im.as_pillow())

        # Inference
        outputs = self.model(im_batch)

        # Process model outputs
        for x, img, img_output in zip(
            range(batch.num_rows), im_batch, outputs.xyxy
        ):
            w, h = img.size
            rows.extend(
                [
                    {
                        "id": shortuuid.uuid(),
                        "item_id": batch["id"][x].as_py(),
                        "view_id": view,
                        "bbox": BBox.from_xyxy(
                            [coord.item() for coord in pred[0:4]],
                            confidence=pred[4].item(),
                        )
                        .normalize(h, w)
                        .to_dict(),
                        "category": coco_names_91(coco_ids_80to91(pred[5] + 1)),
                    }
                    for pred in img_output
                    if pred[4] > threshold
                ]
            )

    return rows