Pre Annotation
Context
It is common to use a object detection model to pre-annotate a dataset.
This tutorial will help you unlock this feature.
Using YOLOv11 from Ultralytics
In your environnement, install ultralytics
Set your paths
from pathlib import Path
library = Path("/path_to_pixano_library_dir")
media = Path("/path_to_pixano_media_dir")
dataset_dirname = "pixano_dataset_directory_name" # as in library directory
Load your Pixano dataset
from pixano.datasets import Dataset
ds = Dataset(library / dataset_dirname, media_dir=media)
# Add YOLO source
if not ds.get_data("source", ids="src_yolo"):
ds.add_data("source", [Source(id="src_yolo", name="yolo11n", kind="model")])
Utility function to create pixano BBox and associated Entity.
It assume your dataset has been created with the EntityWithCategory custom Entity, and one of the Default provided in pixano.datasets.workspaces.
Of course you can customize it to match your own dataset.
from pixano.features import BBox, Entity
import shortuuid
class EntityWithCategory(Entity):
category: str
def create_pixano_bbox_entity(pix_image, bbox_coords, score, category):
view_ref = {"id": pix_image.id, "name": "image"}
entity = EntityWithCategory(
id=shortuuid.uuid(),
item_ref=pix_image.item_ref,
view_ref=view_ref,
category=category,
)
bbox = BBox(
id=shortuuid.uuid(),
item_ref=pix_image.item_ref,
view_ref=view_ref,
entity_ref={"id": entity.id, "name": "objects"},
confidence=score,
coords=bbox_coords,
is_normalized=True,
format="xyxy",
source_ref={"id":"src_yolo", "name": "source"},
)
return entity, bbox
Load YOLOv11 model.
Pre-annotate
from tqdm.auto import tqdm
new_entities = []
new_bboxes = []
images = ds.get_data("image")
for image in tqdm(images):
results = model.predict(media / image.url, verbose=False)
for res in results:
for bbox, score, category in zip(
res.boxes.xyxyn.tolist(),
res.boxes.conf.tolist(),
res.boxes.cls.tolist(),
):
entity, pix_bbox = create_pixano_bbox_entity(image, bbox, score, res.names[category])
new_entities.append(entity)
new_bboxes.append(pix_bbox)
ds.add_data("objects", new_entities)
ds.add_data("bboxes", new_bboxes)
print("Done")
Using Grounding DINO from IDEA-Research with Pixano Inference
We can use Pixano Inference client as a convenient way to access models, as long as they have a registered inference provider supported by Pixano Inference.
Set your paths
from pathlib import Path
library = Path("/path_to_pixano_library_dir")
media = Path("/path_to_pixano_media_dir")
dataset_dirname = "pixano_dataset_directory_name" # as in library directory
Load your Pixano dataset
from pixano.datasets import Dataset
ds = Dataset(library / dataset_dirname, media_dir=media)
# Add GroundingDINO source
if not ds.get_data("source", ids="src_gdino"):
ds.add_data("source", [Source(id="src_gdino", name="GroundingDINO", kind="model")])
Utility function to create pixano BBox and associated Entity.
It assume your dataset has been created with the EntityWithCategory custom Entity, and one of the Default provided in pixano.datasets.workspaces.
Of course you can customize it to match your own dataset.
This is exactly the same function as in the previous sample, except for BBox is_normalized field, to match Grounding DINO output.
from pixano.features import BBox, Entity
import shortuuid
class EntityWithCategory(Entity):
category: str
def create_pixano_bbox_entity(pix_image, bbox_coords, score, category):
view_ref = {"id": pix_image.id, "name": "image"}
entity = EntityWithCategory(
id=shortuuid.uuid(),
item_ref=pix_image.item_ref,
view_ref=view_ref,
category=category,
)
bbox = BBox(
id=shortuuid.uuid(),
item_ref=pix_image.item_ref,
view_ref=view_ref,
entity_ref={"id": entity.id, "name": "objects"},
confidence=score,
coords=bbox_coords,
is_normalized=False,
format="xyxy",
source_ref={"id":"src_gdino", "name": "source"},
)
return entity, bbox
Load Grounding DINO model with Pixano Inference from transformers provider (HuggingFace)
from pixano_inference.providers.transformers import TransformersProvider
from pixano_inference.tasks import ImageTask
import torch
provider = TransformersProvider()
model = provider.load_model(
"dino",
ImageTask.ZERO_SHOT_DETECTION.value,
torch.device("cuda") if torch.cuda.is_available() else "cpu",
"IDEA-Research/grounding-dino-base"
)
Pre-annotate. We ask Grounding DINO to detect objects of classes ["person", "car", "motorcycle"].
from pixano_inference.pydantic import ImageZeroShotDetectionOutput
from pixano_inference.utils.media import convert_string_to_image
from tqdm.auto import tqdm
new_entities = []
new_bboxes = []
images = ds.get_data("image")
for image in tqdm(images):
image_zero_shot_detection_out: ImageZeroShotDetectionOutput = (
model.image_zero_shot_detection(
image=convert_string_to_image(media / image.url),
classes=["person", "car", "motorcycle"],
box_threshold=0.3,
text_threshold=0.2,
)
)
for bbox, score, category in zip(
image_zero_shot_detection_out.boxes,
image_zero_shot_detection_out.scores,
image_zero_shot_detection_out.classes,
):
entity, pix_bbox = create_pixano_bbox_entity(image, bbox, score, category)
new_entities.append(entity)
new_bboxes.append(pix_bbox)
ds.add_data("objects", new_entities)
ds.add_data("bboxes", new_bboxes)
print("Done")