Pre Annotation

Context

It is common to use a object detection model to pre-annotate a dataset.

This tutorial will help you unlock this feature.

Using YOLOv11 from Ultralytics

In your environnement, install ultralytics

pip install ultralytics

Set your paths

from pathlib import Path

library = Path("/path_to_pixano_library_dir")
media = Path("/path_to_pixano_media_dir")
dataset_dirname = "pixano_dataset_directory_name"  # as in library directory

Load your Pixano dataset

from pixano.datasets import Dataset
ds = Dataset(library / dataset_dirname, media_dir=media)

# Add YOLO source
if not ds.get_data("source", ids="src_yolo"):
    ds.add_data("source", [Source(id="src_yolo", name="yolo11n", kind="model")])

Utility function to create pixano BBox and associated Entity.

It assume your dataset has been created with the EntityWithCategory custom Entity, and one of the Default provided in pixano.datasets.workspaces.

Of course you can customize it to match your own dataset.

from pixano.features import BBox, Entity
import shortuuid

class EntityWithCategory(Entity):
    category: str

def create_pixano_bbox_entity(pix_image, bbox_coords, score, category):
    view_ref = {"id": pix_image.id, "name": "image"}
    entity = EntityWithCategory(
        id=shortuuid.uuid(),
        item_ref=pix_image.item_ref,
        view_ref=view_ref,
        category=category,
    )
    bbox = BBox(
        id=shortuuid.uuid(),
        item_ref=pix_image.item_ref,
        view_ref=view_ref,
        entity_ref={"id": entity.id, "name": "objects"},
        confidence=score,
        coords=bbox_coords,
        is_normalized=True,
        format="xyxy",
        source_ref={"id":"src_yolo", "name": "source"},
    )

    return entity, bbox

Load YOLOv11 model.

from ultralytics import YOLO

# Load a COCO-pretrained YOLO11n model
model = YOLO("yolo11n.pt")

Pre-annotate

from tqdm.auto import tqdm

new_entities = []
new_bboxes = []

images = ds.get_data("image")

for image in tqdm(images):
    results = model.predict(media / image.url, verbose=False)
    for res in results:
        for bbox, score, category in zip(
            res.boxes.xyxyn.tolist(),
            res.boxes.conf.tolist(),
            res.boxes.cls.tolist(),
        ):
            entity, pix_bbox = create_pixano_bbox_entity(image, bbox, score, res.names[category])
            new_entities.append(entity)
            new_bboxes.append(pix_bbox)

ds.add_data("objects", new_entities)
ds.add_data("bboxes", new_bboxes)

print("Done")

Using Grounding DINO from IDEA-Research with Pixano Inference

We can use Pixano Inference client as a convenient way to access models, as long as they have a registered inference provider supported by Pixano Inference.

Set your paths

from pathlib import Path

library = Path("/path_to_pixano_library_dir")
media = Path("/path_to_pixano_media_dir")
dataset_dirname = "pixano_dataset_directory_name"  # as in library directory

Load your Pixano dataset

from pixano.datasets import Dataset
ds = Dataset(library / dataset_dirname, media_dir=media)

# Add GroundingDINO source
if not ds.get_data("source", ids="src_gdino"):
    ds.add_data("source", [Source(id="src_gdino", name="GroundingDINO", kind="model")])

Utility function to create pixano BBox and associated Entity.

It assume your dataset has been created with the EntityWithCategory custom Entity, and one of the Default provided in pixano.datasets.workspaces.

Of course you can customize it to match your own dataset.

This is exactly the same function as in the previous sample, except for BBox is_normalized field, to match Grounding DINO output.

from pixano.features import BBox, Entity
import shortuuid

class EntityWithCategory(Entity):
    category: str

def create_pixano_bbox_entity(pix_image, bbox_coords, score, category):
    view_ref = {"id": pix_image.id, "name": "image"}
    entity = EntityWithCategory(
        id=shortuuid.uuid(),
        item_ref=pix_image.item_ref,
        view_ref=view_ref,
        category=category,
    )
    bbox = BBox(
        id=shortuuid.uuid(),
        item_ref=pix_image.item_ref,
        view_ref=view_ref,
        entity_ref={"id": entity.id, "name": "objects"},
        confidence=score,
        coords=bbox_coords,
        is_normalized=False,
        format="xyxy",
        source_ref={"id":"src_gdino", "name": "source"},
    )

    return entity, bbox

Load Grounding DINO model with Pixano Inference from transformers provider (HuggingFace)

from pixano_inference.providers.transformers import TransformersProvider
from pixano_inference.tasks import ImageTask
import torch

provider = TransformersProvider()
model = provider.load_model(
    "dino",
    ImageTask.ZERO_SHOT_DETECTION.value,
    torch.device("cuda") if torch.cuda.is_available() else "cpu",
    "IDEA-Research/grounding-dino-base"
)

Pre-annotate. We ask Grounding DINO to detect objects of classes ["person", "car", "motorcycle"].

from pixano_inference.pydantic import ImageZeroShotDetectionOutput
from pixano_inference.utils.media import convert_string_to_image
from tqdm.auto import tqdm

new_entities = []
new_bboxes = []

images = ds.get_data("image")

for image in tqdm(images):
    image_zero_shot_detection_out: ImageZeroShotDetectionOutput = (
        model.image_zero_shot_detection(
            image=convert_string_to_image(media / image.url),
            classes=["person", "car", "motorcycle"],
            box_threshold=0.3,
            text_threshold=0.2,
        )
    )
    for bbox, score, category in zip(
        image_zero_shot_detection_out.boxes,
        image_zero_shot_detection_out.scores,
        image_zero_shot_detection_out.classes,
    ):
        entity, pix_bbox = create_pixano_bbox_entity(image, bbox, score, category)
        new_entities.append(entity)
        new_bboxes.append(pix_bbox)

ds.add_data("objects", new_entities)
ds.add_data("bboxes", new_bboxes)

print("Done")